diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d62bbac..0000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, -and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by -the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all -other entities that control, are controlled by, or are under common -control with that entity. For the purposes of this definition, -"control" means (i) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or -otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity -exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, -including but not limited to software source code, documentation -source, and configuration files. - -"Object" form shall mean any form resulting from mechanical -transformation or translation of a Source form, including but -not limited to compiled object code, generated documentation, -and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or -Object form, made available under the License, as indicated by a -copyright notice that is included in or attached to the work -(an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object -form, that is based on (or derived from) the Work and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. For the purposes -of this License, Derivative Works shall not include works that remain -separable from, or merely link (or bind by name) to the interfaces of, -the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including -the original version of the Work and any modifications or additions -to that Work or Derivative Works thereof, that is intentionally -submitted to Licensor for inclusion in the Work by the copyright owner -or by an individual or Legal Entity authorized to submit on behalf of -the copyright owner. For the purposes of this definition, "submitted" -means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, -and issue tracking systems that are managed by, or on behalf of, the -Licensor for the purpose of discussing and improving the Work, but -excluding communication that is conspicuously marked or otherwise -designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity -on behalf of whom a Contribution has been received by Licensor and -subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the -Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -(except as stated in this section) patent license to make, have made, -use, offer to sell, sell, import, and otherwise transfer the Work, -where such license applies only to those patent claims licensable -by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) -with the Work to which such Contribution(s) was submitted. If You -institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work -or a Contribution incorporated within the Work constitutes direct -or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate -as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the -Work or Derivative Works thereof in any medium, with or without -modifications, and in Source or Object form, provided that You -meet the following conditions: - -(a) You must give any other recipients of the Work or -Derivative Works a copy of this License; and - -(b) You must cause any modified files to carry prominent notices -stating that You changed the files; and - -(c) You must retain, in the Source form of any Derivative Works -that You distribute, all copyright, patent, trademark, and -attribution notices from the Source form of the Work, -excluding those notices that do not pertain to any part of -the Derivative Works; and - -(d) If the Work includes a "NOTICE" text file as part of its -distribution, then any Derivative Works that You distribute must -include a readable copy of the attribution notices contained -within such NOTICE file, excluding those notices that do not -pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed -as part of the Derivative Works; within the Source form or -documentation, if provided along with the Derivative Works; or, -within a display generated by the Derivative Works, if and -wherever such third-party notices normally appear. The contents -of the NOTICE file are for informational purposes only and -do not modify the License. You may add Your own attribution -notices within Derivative Works that You distribute, alongside -or as an addendum to the NOTICE text from the Work, provided -that such additional attribution notices cannot be construed -as modifying the License. - -You may add Your own copyright statement to Your modifications and -may provide additional or different license terms and conditions -for use, reproduction, or distribution of Your modifications, or -for any such Derivative Works as a whole, provided Your use, -reproduction, and distribution of the Work otherwise complies with -the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, -any Contribution intentionally submitted for inclusion in the Work -by You to the Licensor shall be under the terms and conditions of -this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify -the terms of any separate license agreement you may have executed -with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade -names, trademarks, service marks, or product names of the Licensor, -except as required for reasonable and customary use in describing the -origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or -agreed to in writing, Licensor provides the Work (and each -Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied, including, without limitation, any warranties or conditions -of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any -risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, -whether in tort (including negligence), contract, or otherwise, -unless required by applicable law (such as deliberate and grossly -negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, -incidental, or consequential damages of any character arising as a -result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, -work stoppage, computer failure or malfunction, or any and all -other commercial damages or losses), even if such Contributor -has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing -the Work or Derivative Works thereof, You may choose to offer, -and charge a fee for, acceptance of support, warranty, indemnity, -or other liability obligations and/or rights consistent with this -License. However, in accepting such obligations, You may act only -on Your own behalf and on Your sole responsibility, not on behalf -of any other Contributor, and only if You agree to indemnify, -defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason -of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following -boilerplate notice, with the fields enclosed by brackets "[]" -replaced with your own identifying information. (Don't include -the brackets!) The text should be enclosed in the appropriate -comment syntax for the file format. We also recommend that a -file or class name and description of purpose be included on the -same "printed page" as the copyright notice for easier -identification within third-party archives. - -Copyright 2026 VibeMouse Contributors - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/README.md b/README.md index a439eaa..b97c32e 100644 --- a/README.md +++ b/README.md @@ -1,238 +1,17 @@ -# VibeMouse +# OpenClaw STT Module -Mouse-side-button voice input for VibeCoding. +This repository now only keeps a single Speech-to-Text module for OpenClaw. -中文文档:[`README.zh-CN.md`](./README.zh-CN.md) +- Plugin path: `openclaw-stt-plugin` +- Tool name: `stt_transcribe` +- Purpose: record or read audio, transcribe to text, and return text to OpenClaw -AI adaptation guides: -- English: [`docs/AI_ASSISTANT_DEPLOYMENT.md`](./docs/AI_ASSISTANT_DEPLOYMENT.md) -- 中文:[`docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md`](./docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md) +Quick setup: -## What This Project Does +1. `cd /Volumes/AI/GitHub/VibeMouse/openclaw-stt-plugin` +2. `bash install_local.sh` -VibeMouse binds your coding speech workflow to mouse side buttons: -- Front side button: start/stop recording -- Rear side button while idle: send Enter -- Rear side button while recording: stop recording and route transcript to OpenClaw +Quick test: -Core goals are low friction, stable daily use, and graceful fallback when any subsystem fails. - -## Runtime Architecture (Core) - -The runtime is event-driven and split by responsibility: - -1. `vibemouse/main.py` - - CLI entry (`run` / `doctor`) -2. `vibemouse/app.py` - - Orchestrates button events, recording state, transcription workers, and final output routing -3. `vibemouse/mouse_listener.py` - - Captures side buttons and gestures (`evdev` first, fallback path available) -4. `vibemouse/audio.py` - - Records audio to temp WAV -5. `vibemouse/transcriber.py` - - SenseVoice backend selection and transcription -6. `vibemouse/output.py` - - Text typing / clipboard / OpenClaw dispatch, with fallback and reason tracking -7. `vibemouse/system_integration.py` - - Platform adapter boundary (Hyprland now, Windows/macOS extension points prepared) -8. `vibemouse/doctor.py` - - Built-in diagnostics for env, OpenClaw, input permissions, and known conflicts - -## Quick Start (Linux) - -### Ubuntu / Debian packages - -```bash -sudo apt update -sudo apt install -y python3-gi gir1.2-atspi-2.0 portaudio19-dev libsndfile1 -``` - -### Arch packages - -```bash -sudo pacman -Syu --needed python python-pip python-gobject portaudio libsndfile -``` - -### Install - -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -U pip -pip install -e . -``` - -### Run - -```bash -export VIBEMOUSE_BACKEND=auto -export VIBEMOUSE_DEVICE=cpu -vibemouse -``` - -### One-command auto deploy (recommended) - -```bash -bash scripts/auto-deploy.sh --preset stable -``` - -This command bootstraps `.venv`, installs VibeMouse, generates service/env files, -enables `systemd --user` service, and runs `vibemouse doctor`. - -Available presets: -- `stable`: balanced daily-driver defaults -- `fast`: lower debounce + higher OpenClaw retries -- `low-resource`: lower background footprint defaults - -Examples: - -```bash -# High reliability profile -bash scripts/auto-deploy.sh --preset stable - -# Keep resources low -bash scripts/auto-deploy.sh --preset low-resource - -# Custom OpenClaw target assistant -bash scripts/auto-deploy.sh --preset stable --openclaw-agent ops -``` - -## Default Mapping and State Logic - -- `VIBEMOUSE_FRONT_BUTTON` default: `x1` -- `VIBEMOUSE_REAR_BUTTON` default: `x2` - -State matrix: -- Idle + rear press -> Enter (`VIBEMOUSE_ENTER_MODE`) -- Recording + rear press -> stop recording + OpenClaw dispatch - -If your hardware labels are reversed: - -```bash -export VIBEMOUSE_FRONT_BUTTON=x2 -export VIBEMOUSE_REAR_BUTTON=x1 -``` - -## OpenClaw Integration (Core) - -OpenClaw route is explicit and configurable: -- `VIBEMOUSE_OPENCLAW_COMMAND` (default `openclaw`) -- `VIBEMOUSE_OPENCLAW_AGENT` (default `main`) -- `VIBEMOUSE_OPENCLAW_TIMEOUT_S` (default `20.0`) -- `VIBEMOUSE_OPENCLAW_RETRIES` (default `0`) - -Dispatch behavior: -- Fast fire-and-forget spawn to avoid blocking UI interaction -- Route result includes reason (`dispatched`, `dispatched_after_retry_*`, `spawn_error:*`, etc.) -- Clipboard fallback if command is invalid or spawn fails - -Deployment tip: if you run your own local assistant setup, set -`VIBEMOUSE_OPENCLAW_AGENT` to your own assistant ID. - -## Built-in Doctor - -Run diagnostics: - -```bash -vibemouse doctor -``` - -Apply safe auto-fixes first, then re-check: - -```bash -vibemouse doctor --fix -``` - -Current checks include: -- Config load validity -- OpenClaw command resolution + agent existence -- Microphone input availability -- Linux input device permissions / side-button capability -- Hyprland rear-button Return bind conflicts -- `systemctl --user` service activity - -Current auto-fixes (`--fix`) include: -- Auto-disable conflicting Hyprland side-button Return binds -- Attempt to restart inactive `vibemouse.service` - -Exit code is non-zero when any `FAIL` check exists. - -## Deploy Command - -The deploy command is scriptable and can be used directly: - -```bash -vibemouse deploy --preset stable -``` - -Useful flags: -- `--preset stable|fast|low-resource` -- `--openclaw-command "openclaw --profile prod"` -- `--openclaw-agent main` -- `--openclaw-retries 2` -- `--skip-systemctl` -- `--dry-run` - -## Frequently Used Variables - -| Variable | Default | Purpose | -|---|---|---| -| `VIBEMOUSE_ENTER_MODE` | `enter` | Rear-button submit mode (`enter`, `ctrl_enter`, `shift_enter`, `none`) | -| `VIBEMOUSE_AUTO_PASTE` | `false` | Auto paste when route falls back to clipboard | -| `VIBEMOUSE_GESTURES_ENABLED` | `false` | Enable gesture recognition | -| `VIBEMOUSE_GESTURE_TRIGGER_BUTTON` | `rear` | Gesture trigger (`front`, `rear`, `right`) | -| `VIBEMOUSE_GESTURE_THRESHOLD_PX` | `120` | Gesture movement threshold | -| `VIBEMOUSE_GESTURE_FREEZE_POINTER` | `true` | Freeze pointer during gesture capture | -| `VIBEMOUSE_PREWARM_ON_START` | `true` | Preload ASR on startup to reduce first-use latency | -| `VIBEMOUSE_PREWARM_DELAY_S` | `0.0` | Delay ASR prewarm after startup to improve initial responsiveness | -| `VIBEMOUSE_STATUS_FILE` | `$XDG_RUNTIME_DIR/vibemouse-status.json` | Runtime status for bars/widgets | - -Full configuration source of truth: `vibemouse/config.py`. - -## Troubleshooting Shortlist - -### Rear button still sends Enter while recording - -Check Hyprland-level hard bind conflict in -`~/.config/hypr/UserConfigs/UserKeybinds.conf` and remove lines like: - -```ini -bind = , mouse:275, sendshortcut, , Return, activewindow -bind = , mouse:276, sendshortcut, , Return, activewindow -``` - -Then reload: - -```bash -hyprctl reload config-only -``` - -### OpenClaw route not working - -```bash -openclaw agent --agent main --message "ping" --json -vibemouse doctor -``` - -### Side button not detected on Linux - -```bash -sudo usermod -aG input $USER -# relogin required -``` - -## For AI Assistants and Platform Adapters - -Use this guide when adapting to Windows/macOS or custom environments: - -- [`docs/AI_ASSISTANT_DEPLOYMENT.md`](./docs/AI_ASSISTANT_DEPLOYMENT.md) -- [`docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md`](./docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md) - -It contains architecture contracts, dependency download links, adaptation workflow, -and a prompt template for autonomous platform adaptation. - -## License - -Source code is licensed under Apache-2.0. See `LICENSE`. - -Third-party and model asset notices: `THIRD_PARTY_NOTICES.md`. +1. `openclaw plugins info openclaw-stt` +2. `openclaw plugins doctor` diff --git a/README.zh-CN.md b/README.zh-CN.md deleted file mode 100644 index ef6c9ce..0000000 --- a/README.zh-CN.md +++ /dev/null @@ -1,236 +0,0 @@ -# VibeMouse - -面向 VibeCoding 的鼠标侧键语音输入工具。 - -English README: [`README.md`](./README.md) - -AI 适配指南: -- English: [`docs/AI_ASSISTANT_DEPLOYMENT.md`](./docs/AI_ASSISTANT_DEPLOYMENT.md) -- 中文:[`docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md`](./docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md) - -## 这个项目解决什么问题 - -VibeMouse 把高频语音工作流绑定到鼠标侧键: -- 前侧键:开始 / 结束录音 -- 空闲态按后侧键:发送 Enter -- 录音态按后侧键:停止录音并把转写发送到 OpenClaw - -核心目标是低摩擦、可日常稳定使用,并且每个环节失败时都有回退路径。 - -## 运行架构(核心) - -整体是事件驱动,按职责拆分: - -1. `vibemouse/main.py` - - CLI 入口(`run` / `doctor`) -2. `vibemouse/app.py` - - 编排按钮事件、录音状态、转写线程和输出路由 -3. `vibemouse/mouse_listener.py` - - 监听侧键与手势(优先 `evdev`,含回退) -4. `vibemouse/audio.py` - - 录音并写入临时 WAV -5. `vibemouse/transcriber.py` - - SenseVoice 后端选择与识别 -6. `vibemouse/output.py` - - 输入 / 剪贴板 / OpenClaw 路由与失败回退 -7. `vibemouse/system_integration.py` - - 平台适配边界(当前 Hyprland,可扩展 Windows/macOS) -8. `vibemouse/doctor.py` - - 内置自检(环境、OpenClaw、输入权限、冲突绑定) - -## 快速开始(Linux) - -### Ubuntu / Debian 依赖 - -```bash -sudo apt update -sudo apt install -y python3-gi gir1.2-atspi-2.0 portaudio19-dev libsndfile1 -``` - -### Arch 依赖 - -```bash -sudo pacman -Syu --needed python python-pip python-gobject portaudio libsndfile -``` - -### 安装 - -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -U pip -pip install -e . -``` - -### 运行 - -```bash -export VIBEMOUSE_BACKEND=auto -export VIBEMOUSE_DEVICE=cpu -vibemouse -``` - -### 一键自动部署(推荐) - -```bash -bash scripts/auto-deploy.sh --preset stable -``` - -这个命令会自动完成 `.venv` 初始化、安装 VibeMouse、生成 service/env 文件、 -启用 `systemd --user` 服务并执行 `vibemouse doctor`。 - -可选预设: -- `stable`:日常稳定均衡 -- `fast`:更低去抖 + 更高 OpenClaw 重试 -- `low-resource`:更低后台资源占用 - -示例: - -```bash -# 稳定档 -bash scripts/auto-deploy.sh --preset stable - -# 低资源档 -bash scripts/auto-deploy.sh --preset low-resource - -# 指定你自己的 OpenClaw 助手 -bash scripts/auto-deploy.sh --preset stable --openclaw-agent ops -``` - -## 默认映射与状态逻辑 - -- `VIBEMOUSE_FRONT_BUTTON` 默认:`x1` -- `VIBEMOUSE_REAR_BUTTON` 默认:`x2` - -状态矩阵: -- 空闲 + 后侧键 -> Enter(由 `VIBEMOUSE_ENTER_MODE` 控制) -- 录音中 + 后侧键 -> 停止录音 + OpenClaw 路由 - -如果鼠标物理定义相反: - -```bash -export VIBEMOUSE_FRONT_BUTTON=x2 -export VIBEMOUSE_REAR_BUTTON=x1 -``` - -## OpenClaw 集成(核心) - -OpenClaw 路由可配置: -- `VIBEMOUSE_OPENCLAW_COMMAND`(默认 `openclaw`) -- `VIBEMOUSE_OPENCLAW_AGENT`(默认 `main`) -- `VIBEMOUSE_OPENCLAW_TIMEOUT_S`(默认 `20.0`) -- `VIBEMOUSE_OPENCLAW_RETRIES`(默认 `0`) - -调度行为: -- 快速非阻塞派发,避免阻塞交互 -- 返回路由原因(如 `dispatched`、`dispatched_after_retry_*`、`spawn_error:*`) -- 命令无效或拉起失败时自动回退到剪贴板 - -部署提示:如果你用自己的本地 AI 助手体系,把 -`VIBEMOUSE_OPENCLAW_AGENT` 改成你自己的助手 ID。 - -## 内置自检 Doctor - -运行: - -```bash -vibemouse doctor -``` - -先执行安全自动修复再复检: - -```bash -vibemouse doctor --fix -``` - -当前检查项: -- 配置加载是否有效 -- OpenClaw 命令是否可执行 + agent 是否存在 -- 麦克风输入设备可用性 -- Linux 输入设备权限 / 侧键能力 -- Hyprland 后侧键 Return 冲突绑定 -- `systemctl --user` 服务状态 - -当前 `--fix` 自动修复项: -- 自动禁用冲突的 Hyprland 侧键 Return 绑定 -- 尝试拉起处于 inactive 状态的 `vibemouse.service` - -只要存在 `FAIL`,命令退出码就是非零,方便自动化检测。 - -## Deploy 命令 - -也可以直接用 deploy 子命令: - -```bash -vibemouse deploy --preset stable -``` - -常用参数: -- `--preset stable|fast|low-resource` -- `--openclaw-command "openclaw --profile prod"` -- `--openclaw-agent main` -- `--openclaw-retries 2` -- `--skip-systemctl` -- `--dry-run` - -## 常用配置项 - -| 变量 | 默认值 | 作用 | -|---|---|---| -| `VIBEMOUSE_ENTER_MODE` | `enter` | 后侧键提交模式(`enter`、`ctrl_enter`、`shift_enter`、`none`) | -| `VIBEMOUSE_AUTO_PASTE` | `false` | 回退到剪贴板后是否自动粘贴 | -| `VIBEMOUSE_GESTURES_ENABLED` | `false` | 是否启用手势识别 | -| `VIBEMOUSE_GESTURE_TRIGGER_BUTTON` | `rear` | 手势触发键(`front`、`rear`、`right`) | -| `VIBEMOUSE_GESTURE_THRESHOLD_PX` | `120` | 手势识别阈值 | -| `VIBEMOUSE_GESTURE_FREEZE_POINTER` | `true` | 手势期间是否冻结指针 | -| `VIBEMOUSE_PREWARM_ON_START` | `true` | 启动预热,降低首次识别延迟 | -| `VIBEMOUSE_PREWARM_DELAY_S` | `0.0` | 启动后延迟执行 ASR 预热,改善初始响应速度 | -| `VIBEMOUSE_STATUS_FILE` | `$XDG_RUNTIME_DIR/vibemouse-status.json` | 运行状态文件(状态栏读取) | - -完整配置以 `vibemouse/config.py` 为准。 - -## 故障排查(短版) - -### 录音时后侧键仍然发送回车 - -检查并移除 Hyprland 的硬绑定: - -```ini -bind = , mouse:275, sendshortcut, , Return, activewindow -bind = , mouse:276, sendshortcut, , Return, activewindow -``` - -然后重载: - -```bash -hyprctl reload config-only -``` - -### OpenClaw 路由异常 - -```bash -openclaw agent --agent main --message "ping" --json -vibemouse doctor -``` - -### Linux 下侧键监听不到 - -```bash -sudo usermod -aG input $USER -# 需要重新登录 -``` - -## 给 AI 助手做平台适配 - -请直接看这两份专用指南: - -- [`docs/AI_ASSISTANT_DEPLOYMENT.md`](./docs/AI_ASSISTANT_DEPLOYMENT.md) -- [`docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md`](./docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md) - -里面包含:架构契约、依赖下载地址、平台适配流程、以及可直接复用的 AI 提示模板。 - -## License - -项目源码采用 Apache-2.0,详见 `LICENSE`。 - -第三方依赖与模型资产声明见 `THIRD_PARTY_NOTICES.md`。 diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md deleted file mode 100644 index fe676cc..0000000 --- a/THIRD_PARTY_NOTICES.md +++ /dev/null @@ -1,71 +0,0 @@ -# Third-Party Notices - -This document summarizes third-party components used by VibeMouse and their -declared licenses. - -Last reviewed: 2026-03-01. - -## 1) Project License - -VibeMouse source code is licensed under Apache-2.0. See `LICENSE`. - -## 2) Direct Python Dependencies - -The following are direct runtime dependencies declared in `pyproject.toml`. - -| Package | Declared License | Upstream | -|---|---|---| -| `numpy` | BSD-3-Clause | https://numpy.org | -| `sounddevice` | MIT | https://python-sounddevice.readthedocs.io/ | -| `soundfile` | BSD-3-Clause | https://github.com/bastibe/python-soundfile | -| `pynput` | LGPL-3.0 (or later) | https://github.com/moses-palmer/pynput | -| `evdev` | BSD-3-Clause | https://github.com/gvalkov/python-evdev | -| `PyGObject` | LGPL-2.1 (or later) | https://pygobject.gnome.org | -| `pyperclip` | BSD | https://github.com/asweigart/pyperclip | -| `funasr` | MIT | https://github.com/modelscope/FunASR | -| `funasr-onnx` | MIT | https://pypi.org/project/funasr-onnx/ | -| `onnxruntime` | MIT | https://github.com/microsoft/onnxruntime | -| `openvino` | Apache-2.0 | https://github.com/openvinotoolkit/openvino | -| `modelscope` | Apache-2.0 | https://github.com/modelscope/modelscope | - -Notes: - -- `pynput` and `PyGObject` are LGPL-licensed. If you redistribute packaged - binaries, ensure LGPL obligations are satisfied (license notice, relinking - conditions where applicable, and source availability requirements for the - LGPL-covered components). -- Transitive dependencies are not exhaustively listed here. They remain subject - to their own licenses. - -## 3) Model Assets and Weights - -VibeMouse defaults to model IDs: - -- `iic/SenseVoiceSmall` -- `iic/SenseVoiceSmall-onnx` - -At review time, ModelScope API metadata reports both model IDs as -`Apache License 2.0`. - -References: - -- https://www.modelscope.cn/api/v1/models/iic/SenseVoiceSmall -- https://www.modelscope.cn/api/v1/models/iic/SenseVoiceSmall-onnx - -Important caveat: - -- The FunASR repository also contains a model-specific `MODEL_LICENSE` with - additional terms for "FunASR Software" weights: - https://raw.githubusercontent.com/modelscope/FunASR/main/MODEL_LICENSE -- If you switch to other model IDs, mirror-hosted weights, or bundled model - artifacts, re-verify the exact model license/terms before redistribution. - -## 4) Attribution and Compliance Guidance - -When distributing VibeMouse (source or binaries): - -1. Keep `LICENSE` and this notice file. -2. Preserve upstream copyright and license notices for bundled components. -3. Re-check model card licenses when changing model IDs or revisions. -4. Do not assume all speech models under the same ecosystem share identical - license terms. diff --git a/docs/AI_ASSISTANT_DEPLOYMENT.md b/docs/AI_ASSISTANT_DEPLOYMENT.md deleted file mode 100644 index 4d8e74e..0000000 --- a/docs/AI_ASSISTANT_DEPLOYMENT.md +++ /dev/null @@ -1,211 +0,0 @@ -# VibeMouse AI Assistant Deployment & Adaptation Guide - -This guide is for AI assistants (and engineers using AI assistants) to deploy VibeMouse on a new machine and adapt it to a new platform safely. - -Use this as the source-of-truth playbook when adding Windows/macOS support or custom desktop integration. - -## 1) Project Goal and Non-Negotiable Behavior - -VibeMouse is a side-button voice workflow tool. - -Required behavior: -- Front side button: start/stop recording -- Rear side button when idle: send Enter -- Rear side button while recording: stop recording and dispatch transcript to OpenClaw -- Fallbacks must preserve user output (never silently lose text) - -Do not break this state machine while adapting platforms. - -## 2) Core Architecture Map - -Key modules: -- `vibemouse/main.py`: CLI entry (`run`, `doctor`) -- `vibemouse/app.py`: runtime orchestration + state machine + worker lifecycle -- `vibemouse/mouse_listener.py`: side-button capture + gesture path -- `vibemouse/audio.py`: microphone recording -- `vibemouse/transcriber.py`: ASR backend selection/transcription -- `vibemouse/output.py`: text output routing + OpenClaw dispatch + fallback -- `vibemouse/system_integration.py`: platform adapter boundary -- `vibemouse/doctor.py`: environment and runtime diagnostics -- `vibemouse/config.py`: env config contract - -## 3) Platform Adaptation Boundary (Most Important) - -When adapting a platform, implement/extend `SystemIntegration` in `vibemouse/system_integration.py`. - -Methods used by runtime: -- `is_hyprland` -- `send_shortcut(mod, key)` -- `active_window()` -- `cursor_position()` -- `move_cursor(x, y)` -- `switch_workspace(direction)` -- `is_text_input_focused()` -- `send_enter_via_accessibility()` -- `is_terminal_window_active()` -- `paste_shortcuts(terminal_active)` - -Rule: add platform-specific behavior here first; avoid spreading platform logic across `app.py` and `output.py`. - -## 4) Dependencies and Download Sources - -### Required foundations -- Python 3.10+: https://www.python.org/downloads/ -- pip: https://pip.pypa.io/en/stable/installation/ - -### Runtime and audio -- PortAudio: http://www.portaudio.com/download.html -- libsndfile: https://github.com/libsndfile/libsndfile -- `sounddevice`: https://pypi.org/project/sounddevice/ -- `soundfile`: https://pypi.org/project/soundfile/ - -### Input and desktop integration -- `pynput`: https://pypi.org/project/pynput/ -- `evdev` (Linux): https://python-evdev.readthedocs.io/en/latest/ -- PyGObject / AT-SPI: https://pygobject.gnome.org/ - -### ASR and model stack -- FunASR: https://pypi.org/project/funasr/ -- FunASR ONNX: https://pypi.org/project/funasr-onnx/ -- ONNX Runtime: https://pypi.org/project/onnxruntime/ -- OpenVINO: https://pypi.org/project/openvino/ -- ModelScope: https://pypi.org/project/modelscope/ - -### OpenClaw integration target -- OpenClaw repo: https://github.com/openclaw/openclaw - -The project’s pinned Python dependencies are defined in `pyproject.toml`. - -## 5) Deployment Procedure (Assistant-Executable) - -Fastest path (recommended): - -```bash -bash scripts/auto-deploy.sh --preset stable -``` - -Preset choices: `stable`, `fast`, `low-resource`. - -Direct command alternative: - -```bash -vibemouse deploy --preset stable -``` - -1. Clone and install -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -U pip -pip install -e . -``` - -2. Run diagnostics first -```bash -vibemouse doctor -vibemouse doctor --fix -``` - -3. Ensure OpenClaw route works -```bash -openclaw agent --agent main --message "ping" --json -``` - -4. Start runtime -```bash -vibemouse -``` - -5. Validate behavior matrix manually -- idle + rear -> Enter -- recording + rear -> OpenClaw dispatch - -## 6) Service Deployment (Linux user service) - -Recommended user service file location: -- `~/.config/systemd/user/vibemouse.service` - -Minimum lifecycle commands: -```bash -systemctl --user daemon-reload -systemctl --user enable --now vibemouse.service -systemctl --user status vibemouse.service -``` - -## 7) Environment Contract (Critical Variables) - -OpenClaw: -- `VIBEMOUSE_OPENCLAW_COMMAND` -- `VIBEMOUSE_OPENCLAW_AGENT` -- `VIBEMOUSE_OPENCLAW_TIMEOUT_S` -- `VIBEMOUSE_OPENCLAW_RETRIES` - -Buttons/state: -- `VIBEMOUSE_FRONT_BUTTON` -- `VIBEMOUSE_REAR_BUTTON` -- `VIBEMOUSE_ENTER_MODE` - -ASR performance: -- `VIBEMOUSE_BACKEND` -- `VIBEMOUSE_DEVICE` -- `VIBEMOUSE_PREWARM_ON_START` - -Gesture path: -- `VIBEMOUSE_GESTURES_ENABLED` -- `VIBEMOUSE_GESTURE_TRIGGER_BUTTON` -- `VIBEMOUSE_GESTURE_THRESHOLD_PX` -- `VIBEMOUSE_GESTURE_FREEZE_POINTER` - -## 8) Adaptation Checklist for New Platform - -When adding Windows/macOS support: - -1. Add platform class in `system_integration.py`. -2. Implement shortcut send + active window + focus probe with native APIs. -3. Define terminal detection hints and paste shortcut strategy. -4. Keep fallback chain intact in `output.py`. -5. Verify rear-button state machine in `app.py` unchanged. -6. Add tests in: - - `tests/test_system_integration.py` - - `tests/test_output.py` - - `tests/test_app.py` -7. Run full verification: -```bash -python -m compileall vibemouse -python -m unittest discover -s tests -p "test_*.py" -vibemouse doctor -``` - -## 9) Regression Gates (Must Pass Before Merge) - -- No change to front/rear state semantics -- OpenClaw dispatch keeps fallback path -- Doctor command still reports useful failures/warnings -- Existing tests pass; new platform tests added -- No destructive change to Linux Hyprland path - -## 10) Prompt Template for AI Assistants - -Use this prompt when asking an AI assistant to adapt VibeMouse: - -```text -You are adapting VibeMouse to . - -Constraints: -1) Preserve button state machine: - - front: start/stop recording - - rear idle: Enter - - rear recording: OpenClaw dispatch -2) Implement platform logic only via system_integration.py first. -3) Preserve fallback behavior (clipboard fallback on OpenClaw spawn failure). -4) Add/adjust tests in test_system_integration.py, test_output.py, test_app.py. -5) Run compileall + full unit tests + vibemouse doctor and report results. - -Deliver: -- code changes -- test changes -- verification evidence -- known platform-specific limitations -``` - -This keeps adaptation focused, testable, and safe for daily usage. diff --git a/docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md b/docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md deleted file mode 100644 index 158a7dc..0000000 --- a/docs/AI_ASSISTANT_DEPLOYMENT.zh-CN.md +++ /dev/null @@ -1,209 +0,0 @@ -# VibeMouse AI 助手部署与平台适配指南 - -本指南面向 AI 助手(以及使用 AI 助手的开发者),用于在新机器部署 VibeMouse,并安全地做新平台适配。 - -如果要做 Windows/macOS 适配,或者接入自定义桌面环境,请以本文件为执行基线。 - -## 1)项目目标与不可破坏行为 - -VibeMouse 是“鼠标侧键语音工作流”工具。 - -必须保持的行为: -- 前侧键:开始/结束录音 -- 空闲态后侧键:发送 Enter -- 录音态后侧键:停止录音并将转写发送到 OpenClaw -- 任何失败都必须有可见回退,不能静默丢字 - -做适配时,禁止破坏上述状态机。 - -## 2)核心架构地图 - -关键模块: -- `vibemouse/main.py`:CLI 入口(`run`、`doctor`) -- `vibemouse/app.py`:主状态机、线程编排、输出路由 -- `vibemouse/mouse_listener.py`:侧键监听与手势路径 -- `vibemouse/audio.py`:录音 -- `vibemouse/transcriber.py`:ASR 后端与识别 -- `vibemouse/output.py`:输入/剪贴板/OpenClaw 路由与回退 -- `vibemouse/system_integration.py`:平台适配边界 -- `vibemouse/doctor.py`:部署与运行自检 -- `vibemouse/config.py`:环境变量配置契约 - -## 3)平台适配边界(最重要) - -适配新平台时,优先扩展 `vibemouse/system_integration.py`,不要把平台特化逻辑散落到 `app.py` / `output.py`。 - -运行时依赖的方法: -- `is_hyprland` -- `send_shortcut(mod, key)` -- `active_window()` -- `cursor_position()` -- `move_cursor(x, y)` -- `switch_workspace(direction)` -- `is_text_input_focused()` -- `send_enter_via_accessibility()` -- `is_terminal_window_active()` -- `paste_shortcuts(terminal_active)` - -## 4)依赖项与下载地址 - -### 基础环境 -- Python 3.10+:https://www.python.org/downloads/ -- pip 安装文档:https://pip.pypa.io/en/stable/installation/ - -### 音频链路 -- PortAudio:http://www.portaudio.com/download.html -- libsndfile:https://github.com/libsndfile/libsndfile -- `sounddevice`:https://pypi.org/project/sounddevice/ -- `soundfile`:https://pypi.org/project/soundfile/ - -### 输入与桌面集成 -- `pynput`:https://pypi.org/project/pynput/ -- `evdev`(Linux):https://python-evdev.readthedocs.io/en/latest/ -- PyGObject / AT-SPI:https://pygobject.gnome.org/ - -### 语音识别与模型栈 -- FunASR:https://pypi.org/project/funasr/ -- FunASR ONNX:https://pypi.org/project/funasr-onnx/ -- ONNX Runtime:https://pypi.org/project/onnxruntime/ -- OpenVINO:https://pypi.org/project/openvino/ -- ModelScope:https://pypi.org/project/modelscope/ - -### OpenClaw 目标 -- OpenClaw 仓库:https://github.com/openclaw/openclaw - -Python 依赖版本以 `pyproject.toml` 为准。 - -## 5)部署步骤(可直接让 AI 助手执行) - -最快部署路径(推荐): - -```bash -bash scripts/auto-deploy.sh --preset stable -``` - -预设可选:`stable`、`fast`、`low-resource`。 - -也可以直接用 deploy 子命令: - -```bash -vibemouse deploy --preset stable -``` - -1. 安装项目 -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -U pip -pip install -e . -``` - -2. 先跑自检 -```bash -vibemouse doctor -vibemouse doctor --fix -``` - -3. 验证 OpenClaw -```bash -openclaw agent --agent main --message "ping" --json -``` - -4. 启动 -```bash -vibemouse -``` - -5. 手工验证状态矩阵 -- 空闲态后侧键 -> Enter -- 录音态后侧键 -> OpenClaw 路由 - -## 6)Linux user service 部署 - -推荐 service 文件路径: -- `~/.config/systemd/user/vibemouse.service` - -基础命令: -```bash -systemctl --user daemon-reload -systemctl --user enable --now vibemouse.service -systemctl --user status vibemouse.service -``` - -## 7)环境变量契约(关键) - -OpenClaw: -- `VIBEMOUSE_OPENCLAW_COMMAND` -- `VIBEMOUSE_OPENCLAW_AGENT` -- `VIBEMOUSE_OPENCLAW_TIMEOUT_S` -- `VIBEMOUSE_OPENCLAW_RETRIES` - -按钮与状态: -- `VIBEMOUSE_FRONT_BUTTON` -- `VIBEMOUSE_REAR_BUTTON` -- `VIBEMOUSE_ENTER_MODE` - -识别性能: -- `VIBEMOUSE_BACKEND` -- `VIBEMOUSE_DEVICE` -- `VIBEMOUSE_PREWARM_ON_START` - -手势: -- `VIBEMOUSE_GESTURES_ENABLED` -- `VIBEMOUSE_GESTURE_TRIGGER_BUTTON` -- `VIBEMOUSE_GESTURE_THRESHOLD_PX` -- `VIBEMOUSE_GESTURE_FREEZE_POINTER` - -## 8)新平台适配检查单 - -新增 Windows/macOS 支持时: - -1. 在 `system_integration.py` 增加平台类。 -2. 用本地 API 实现快捷键发送、活动窗口检测、焦点探测。 -3. 定义终端识别与粘贴策略。 -4. 保留 `output.py` 的回退链路。 -5. 保证 `app.py` 按键状态机不变。 -6. 补充测试: - - `tests/test_system_integration.py` - - `tests/test_output.py` - - `tests/test_app.py` -7. 跑完整验证: -```bash -python -m compileall vibemouse -python -m unittest discover -s tests -p "test_*.py" -vibemouse doctor -``` - -## 9)合并前回归门槛 - -- 前/后侧键状态语义不变 -- OpenClaw 路由仍有失败回退 -- doctor 输出对故障可读、可定位 -- 全量测试通过,并补充平台测试 -- Linux Hyprland 主路径不能退化 - -## 10)给 AI 助手的提示模板 - -可以直接把下面提示词交给 AI 助手: - -```text -你要把 VibeMouse 适配到 <目标平台>。 - -约束: -1)必须保持按钮状态机: - - 前侧键:开始/结束录音 - - 后侧键空闲态:Enter - - 后侧键录音态:OpenClaw 路由 -2)平台逻辑优先放在 system_integration.py,不要散落到其它模块。 -3)必须保留失败回退(OpenClaw 启动失败回退到剪贴板)。 -4)更新 test_system_integration.py、test_output.py、test_app.py。 -5)执行 compileall + 全量单测 + vibemouse doctor,并报告结果。 - -交付: -- 代码改动 -- 测试改动 -- 验证证据 -- 平台限制说明 -``` - -这能保证适配过程可控、可测、可长期维护。 diff --git a/openclaw-stt-plugin/index.js b/openclaw-stt-plugin/index.js new file mode 100644 index 0000000..841fb15 --- /dev/null +++ b/openclaw-stt-plugin/index.js @@ -0,0 +1,131 @@ +import fs from "node:fs"; +import path from "node:path"; +import { spawnSync } from "node:child_process"; +import { fileURLToPath } from "node:url"; + +const pluginRoot = path.dirname(fileURLToPath(import.meta.url)); +const defaultScriptPath = path.join(pluginRoot, "stt_cli.py"); + +function parseJson(text) { + const trimmed = String(text ?? "").trim(); + if (!trimmed) return null; + try { + return JSON.parse(trimmed); + } catch { + const lines = trimmed.split(/\r?\n/).filter(Boolean); + if (lines.length === 0) return null; + try { + return JSON.parse(lines[lines.length - 1]); + } catch { + return null; + } + } +} + +function createSttTool(api) { + return { + name: "stt_transcribe", + label: "STT Transcribe", + description: "Transcribe local audio to text. If audio_path is missing, record from mic first.", + parameters: { + type: "object", + additionalProperties: false, + properties: { + audio_path: { type: "string", description: "Absolute local audio file path." }, + record_seconds: { type: "number", minimum: 0.3, maximum: 120 }, + model: { type: "string" }, + device: { type: "string" }, + language: { type: "string" }, + use_itn: { type: "boolean" } + } + }, + async execute(_id, params) { + const cfg = api.pluginConfig ?? {}; + const pythonBin = (cfg.pythonBin || process.env.OPENCLAW_STT_PYTHON || "python3").trim(); + const scriptPath = path.resolve((cfg.scriptPath || defaultScriptPath).trim()); + + if (!pythonBin) { + throw new Error("openclaw-stt: pythonBin is empty"); + } + if (!fs.existsSync(scriptPath)) { + throw new Error(`openclaw-stt: stt script not found: ${scriptPath}`); + } + + const args = [scriptPath, "--json"]; + const audioPathRaw = typeof params.audio_path === "string" ? params.audio_path.trim() : ""; + const audioPath = audioPathRaw ? path.resolve(audioPathRaw) : ""; + if (audioPath) { + args.push("--audio-path", audioPath); + } else { + const seconds = + typeof params.record_seconds === "number" && Number.isFinite(params.record_seconds) + ? params.record_seconds + : 5.0; + args.push("--record-seconds", String(seconds)); + } + + const model = + (typeof params.model === "string" && params.model.trim()) || + (typeof cfg.defaultModel === "string" && cfg.defaultModel.trim()) || + "iic/SenseVoiceSmall"; + const device = + (typeof params.device === "string" && params.device.trim()) || + (typeof cfg.defaultDevice === "string" && cfg.defaultDevice.trim()) || + "cpu"; + const language = + (typeof params.language === "string" && params.language.trim()) || + (typeof cfg.defaultLanguage === "string" && cfg.defaultLanguage.trim()) || + "auto"; + const useItn = + typeof params.use_itn === "boolean" + ? params.use_itn + : typeof cfg.defaultUseItn === "boolean" + ? cfg.defaultUseItn + : true; + + args.push("--model", model, "--device", device, "--language", language); + if (useItn) { + args.push("--use-itn"); + } + + const timeoutMs = + typeof cfg.timeoutMs === "number" && Number.isFinite(cfg.timeoutMs) + ? Math.max(1000, Math.min(300000, Math.floor(cfg.timeoutMs))) + : 120000; + + const proc = spawnSync(pythonBin, args, { + encoding: "utf8", + timeout: timeoutMs, + maxBuffer: 1024 * 1024 * 8, + }); + + if (proc.error) { + throw new Error(`openclaw-stt spawn failed: ${proc.error.message}`); + } + + const payload = parseJson(proc.stdout); + const stderr = String(proc.stderr ?? "").trim(); + if (proc.status !== 0) { + const reason = payload?.error || stderr || `exit status ${proc.status}`; + throw new Error(`openclaw-stt failed: ${reason}`); + } + + const text = typeof payload?.text === "string" ? payload.text : ""; + if (!text.trim()) { + return { + content: [{ type: "text", text: "(STT completed, but no speech recognized)" }], + details: { text: "", raw: payload ?? null }, + }; + } + + return { + content: [{ type: "text", text }], + details: { text, raw: payload ?? null }, + }; + }, + }; +} + +export default function register(api) { + api.registerTool(createSttTool(api)); +} diff --git a/openclaw-stt-plugin/install_local.sh b/openclaw-stt-plugin/install_local.sh new file mode 100644 index 0000000..0a5252f --- /dev/null +++ b/openclaw-stt-plugin/install_local.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(cd "$ROOT_DIR/.." && pwd)" +PY_BIN_DEFAULT="$REPO_DIR/.venv/bin/python" + +if [[ -x "$PY_BIN_DEFAULT" ]]; then + PY_BIN="$PY_BIN_DEFAULT" +else + PY_BIN="python3" +fi + +"$PY_BIN" -m pip install -U pip +"$PY_BIN" -m pip install -r "$ROOT_DIR/requirements.txt" + +openclaw plugins install -l "$ROOT_DIR" +openclaw plugins enable openclaw-stt +openclaw config set plugins.entries.openclaw-stt.config.pythonBin "$PY_BIN" +openclaw config set plugins.entries.openclaw-stt.config.scriptPath "$ROOT_DIR/stt_cli.py" +openclaw plugins doctor + +echo "Installed openclaw-stt plugin with python: $PY_BIN" diff --git a/openclaw-stt-plugin/openclaw.plugin.json b/openclaw-stt-plugin/openclaw.plugin.json new file mode 100644 index 0000000..065c326 --- /dev/null +++ b/openclaw-stt-plugin/openclaw.plugin.json @@ -0,0 +1,27 @@ +{ + "id": "openclaw-stt", + "name": "OpenClaw STT", + "description": "Local speech-to-text tool using SenseVoice via Python.", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "pythonBin": { "type": "string" }, + "scriptPath": { "type": "string" }, + "defaultModel": { "type": "string" }, + "defaultDevice": { "type": "string" }, + "defaultLanguage": { "type": "string" }, + "defaultUseItn": { "type": "boolean" }, + "timeoutMs": { "type": "integer", "minimum": 1000, "maximum": 300000 } + } + }, + "uiHints": { + "pythonBin": { "label": "Python Bin" }, + "scriptPath": { "label": "STT Script Path" }, + "defaultModel": { "label": "Default Model" }, + "defaultDevice": { "label": "Default Device" }, + "defaultLanguage": { "label": "Default Language" }, + "defaultUseItn": { "label": "Use ITN by Default" }, + "timeoutMs": { "label": "Timeout (ms)" } + } +} diff --git a/openclaw-stt-plugin/package.json b/openclaw-stt-plugin/package.json new file mode 100644 index 0000000..7a84def --- /dev/null +++ b/openclaw-stt-plugin/package.json @@ -0,0 +1,12 @@ +{ + "name": "openclaw-stt", + "version": "0.1.0", + "private": true, + "type": "module", + "description": "OpenClaw local STT plugin", + "openclaw": { + "extensions": [ + "./index.js" + ] + } +} diff --git a/openclaw-stt-plugin/requirements.txt b/openclaw-stt-plugin/requirements.txt new file mode 100644 index 0000000..f1e621e --- /dev/null +++ b/openclaw-stt-plugin/requirements.txt @@ -0,0 +1,6 @@ +funasr +numpy +sounddevice +soundfile +torch +torchaudio diff --git a/openclaw-stt-plugin/stt_cli.py b/openclaw-stt-plugin/stt_cli.py new file mode 100644 index 0000000..6f74fa0 --- /dev/null +++ b/openclaw-stt-plugin/stt_cli.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import tempfile +import time +from pathlib import Path +from typing import Any + + +def _emit_json(payload: dict[str, Any]) -> None: + print(json.dumps(payload, ensure_ascii=False)) + + +def _build_model(*, model: str, device: str, use_vad: bool, trust_remote_code: bool): + try: + from funasr import AutoModel + except Exception as error: # pragma: no cover + raise RuntimeError( + f"FunASR import failed: {error}. Please install requirements.txt" + ) from error + + kwargs: dict[str, Any] = { + "model": model, + "device": device, + "trust_remote_code": trust_remote_code, + "disable_update": True, + } + if use_vad: + kwargs["vad_model"] = "fsmn-vad" + kwargs["vad_kwargs"] = {"max_single_segment_time": 30000} + + return AutoModel(**kwargs) + + +def _transcribe_audio( + *, + audio_path: Path, + model_name: str, + device: str, + language: str, + use_itn: bool, + use_vad: bool, + trust_remote_code: bool, +) -> tuple[str, str]: + try: + from funasr.utils.postprocess_utils import rich_transcription_postprocess + except Exception as error: # pragma: no cover + raise RuntimeError( + f"FunASR postprocess import failed: {error}. Please install requirements.txt" + ) from error + + model = None + device_in_use = device + primary_error: Exception | None = None + try: + model = _build_model( + model=model_name, + device=device, + use_vad=use_vad, + trust_remote_code=trust_remote_code, + ) + except Exception as error: + primary_error = error + if device.strip().lower() != "cpu": + model = _build_model( + model=model_name, + device="cpu", + use_vad=use_vad, + trust_remote_code=trust_remote_code, + ) + device_in_use = "cpu" + else: + raise + + if model is None: + raise RuntimeError(f"Failed to load model on {device}: {primary_error}") + + result = model.generate( + input=str(audio_path), + cache={}, + language=language, + use_itn=use_itn, + merge_vad=True, + merge_length_s=15, + batch_size_s=60, + ) + if not result: + return "", device_in_use + + raw_text = result[0].get("text", "") + if not isinstance(raw_text, str): + return "", device_in_use + + return rich_transcription_postprocess(raw_text).strip(), device_in_use + + +def _record_to_wav(*, seconds: float, sample_rate: int, channels: int) -> tuple[Path, float]: + try: + import sounddevice as sd + import soundfile as sf + except Exception as error: # pragma: no cover + raise RuntimeError( + f"Audio dependencies missing: {error}. Please install requirements.txt" + ) from error + + frame_count = max(1, int(seconds * sample_rate)) + with tempfile.NamedTemporaryFile(prefix="openclaw_stt_", suffix=".wav", delete=False) as tmp: + out_path = Path(tmp.name) + + start = time.monotonic() + audio = sd.rec( + frame_count, + samplerate=sample_rate, + channels=channels, + dtype="float32", + ) + sd.wait() + duration_s = time.monotonic() - start + sf.write(str(out_path), audio, sample_rate) + return out_path, duration_s + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="openclaw-stt-cli") + parser.add_argument("--audio-path", type=str, default="") + parser.add_argument("--record-seconds", type=float, default=5.0) + parser.add_argument("--sample-rate", type=int, default=16000) + parser.add_argument("--channels", type=int, default=1) + parser.add_argument("--model", type=str, default="iic/SenseVoiceSmall") + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--language", type=str, default="auto") + parser.add_argument("--use-itn", action="store_true") + parser.add_argument("--use-vad", action="store_true") + parser.add_argument("--trust-remote-code", action="store_true") + parser.add_argument("--json", action="store_true") + return parser + + +def main() -> int: + parser = _build_parser() + args = parser.parse_args() + + record_seconds = max(0.3, float(args.record_seconds)) + sample_rate = max(8000, int(args.sample_rate)) + channels = max(1, int(args.channels)) + + temp_audio = False + audio_path = Path(args.audio_path).expanduser().resolve() if args.audio_path else None + if audio_path is None: + try: + audio_path, recorded_duration = _record_to_wav( + seconds=record_seconds, + sample_rate=sample_rate, + channels=channels, + ) + except Exception as error: + if args.json: + _emit_json({"ok": False, "error": str(error)}) + else: + print(f"error: {error}") + return 1 + temp_audio = True + else: + recorded_duration = None + if not audio_path.exists(): + message = f"audio file not found: {audio_path}" + if args.json: + _emit_json({"ok": False, "error": message}) + else: + print(f"error: {message}") + return 1 + + try: + text, device_in_use = _transcribe_audio( + audio_path=audio_path, + model_name=args.model, + device=args.device, + language=args.language, + use_itn=bool(args.use_itn), + use_vad=bool(args.use_vad), + trust_remote_code=bool(args.trust_remote_code), + ) + except Exception as error: + if args.json: + _emit_json({"ok": False, "error": str(error)}) + else: + print(f"error: {error}") + return_code = 1 + else: + payload = { + "ok": True, + "text": text, + "audio_path": str(audio_path), + "device_in_use": device_in_use, + "recorded_duration_s": recorded_duration, + } + if args.json: + _emit_json(payload) + else: + print(text) + return_code = 0 + finally: + if temp_audio and audio_path is not None: + try: + audio_path.unlink(missing_ok=True) + except Exception: + pass + + return return_code + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 7387257..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = ["setuptools>=68", "wheel"] -build-backend = "setuptools.build_meta" - -[project] -name = "vibemouse" -version = "0.1.0" -description = "Mouse side-button voice dictation with SenseVoice" -readme = "README.md" -requires-python = ">=3.10" -license = { file = "LICENSE" } -classifiers = [ - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3", -] -authors = [{ name = "VibeMouse" }] -dependencies = [ - "numpy>=1.24,<2", - "sounddevice>=0.4.6", - "soundfile>=0.12.1", - "pynput>=1.7.7", - "evdev>=1.7.1", - "PyGObject>=3.46.0", - "pyperclip>=1.9.0", - "funasr>=1.2.6", - "funasr-onnx>=0.4.1", - "onnxruntime>=1.24.0", - "openvino>=2026.0.0", - "modelscope>=1.18.0", -] - -[project.scripts] -vibemouse = "vibemouse.main:main" - -[tool.setuptools] -package-dir = {"" = "."} - -[tool.setuptools.packages.find] -where = ["."] -include = ["vibemouse*"] diff --git a/scripts/auto-deploy.sh b/scripts/auto-deploy.sh deleted file mode 100644 index 1dd2e6a..0000000 --- a/scripts/auto-deploy.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" - -cd "${REPO_ROOT}" - -if [[ ! -d ".venv" ]]; then - python3 -m venv .venv -fi - -source "${REPO_ROOT}/.venv/bin/activate" - -pip install -U pip -pip install -e . - -vibemouse deploy "$@" diff --git a/scripts/qwen3_ov_npu_bounds.py b/scripts/qwen3_ov_npu_bounds.py deleted file mode 100644 index 050019f..0000000 --- a/scripts/qwen3_ov_npu_bounds.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import annotations - -from pathlib import Path - -import openvino as ov -from openvino import Dimension, PartialShape - - -def build_bounded_models(src_dir: Path, dst_dir: Path) -> None: - core = ov.Core() - dst_dir.mkdir(parents=True, exist_ok=True) - - audio = core.read_model(str(src_dir / "openvino_thinker_audio_model.xml")) - audio.reshape({"padded_feature": PartialShape([1, 128, Dimension(80, 2048)])}) - ov.save_model(audio, str(dst_dir / "openvino_thinker_audio_model.xml")) - - audio_encoder = core.read_model( - str(src_dir / "openvino_thinker_audio_encoder_model.xml") - ) - audio_encoder.reshape( - { - "hidden_states": PartialShape([Dimension(64, 4096), 896]), - "cu_seqlens": PartialShape([Dimension(2, 512)]), - } - ) - ov.save_model( - audio_encoder, str(dst_dir / "openvino_thinker_audio_encoder_model.xml") - ) - - embedding = core.read_model(str(src_dir / "openvino_thinker_embedding_model.xml")) - embedding.reshape({"input": PartialShape([1, Dimension(1, 2048)])}) - ov.save_model(embedding, str(dst_dir / "openvino_thinker_embedding_model.xml")) - - language = core.read_model(str(src_dir / "openvino_thinker_language_model.xml")) - language.reshape( - { - "attention_mask": PartialShape([1, Dimension(1, 2048)]), - "position_ids": PartialShape([3, 1, Dimension(1, 2048)]), - "inputs_embeds": PartialShape([1, Dimension(1, 2048), 1024]), - "beam_idx": PartialShape([1]), - } - ) - ov.save_model(language, str(dst_dir / "openvino_thinker_language_model.xml")) - - -def check_npu_compile(model_dir: Path) -> list[tuple[str, bool, str]]: - core = ov.Core() - files = [ - "openvino_thinker_audio_model.xml", - "openvino_thinker_audio_encoder_model.xml", - "openvino_thinker_embedding_model.xml", - "openvino_thinker_language_model.xml", - ] - result: list[tuple[str, bool, str]] = [] - for name in files: - path = str(model_dir / name) - try: - compiled = core.compile_model(path, "NPU") - exec_devs = str(compiled.get_property("EXECUTION_DEVICES")) - result.append((name, True, exec_devs)) - except Exception as error: - result.append((name, False, str(error).splitlines()[0])) - return result - - -def main() -> None: - src = Path("tmp/qwen3_ov_model/thinker") - dst = Path("tmp/qwen3_ov_model_static/thinker") - build_bounded_models(src, dst) - print("Bounded models saved to", dst) - for name, ok, message in check_npu_compile(dst): - print(name, "NPU_COMPILE_OK" if ok else "NPU_COMPILE_FAIL", message) - - -if __name__ == "__main__": - main() diff --git a/tests/test_app.py b/tests/test_app.py deleted file mode 100644 index fe5edc5..0000000 --- a/tests/test_app.py +++ /dev/null @@ -1,418 +0,0 @@ -from __future__ import annotations - -import json -import subprocess -import tempfile -import threading -import unittest -from collections.abc import Callable -from pathlib import Path -from types import SimpleNamespace -from typing import cast -from unittest.mock import patch - -from vibemouse.app import VoiceMouseApp - - -class VoiceMouseAppWorkspaceTests(unittest.TestCase): - @staticmethod - def _make_subject() -> VoiceMouseApp: - return object.__new__(VoiceMouseApp) - - def test_switch_workspace_left_uses_expected_dispatcher(self) -> None: - subject = self._make_subject() - switch = cast(Callable[[str], bool], getattr(subject, "_switch_workspace")) - - with patch( - "vibemouse.app.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock: - ok = switch("left") - - self.assertTrue(ok) - self.assertEqual( - run_mock.call_args.args[0], - ["hyprctl", "dispatch", "workspace", "e-1"], - ) - - def test_switch_workspace_right_uses_expected_dispatcher(self) -> None: - subject = self._make_subject() - switch = cast(Callable[[str], bool], getattr(subject, "_switch_workspace")) - - with patch( - "vibemouse.app.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock: - ok = switch("right") - - self.assertTrue(ok) - self.assertEqual( - run_mock.call_args.args[0], - ["hyprctl", "dispatch", "workspace", "e+1"], - ) - - def test_switch_workspace_returns_false_when_process_errors(self) -> None: - subject = self._make_subject() - switch = cast(Callable[[str], bool], getattr(subject, "_switch_workspace")) - - with patch( - "vibemouse.app.subprocess.run", - side_effect=subprocess.TimeoutExpired(cmd=["hyprctl"], timeout=1.0), - ): - ok = switch("left") - - self.assertFalse(ok) - - def test_set_recording_status_writes_recording_payload(self) -> None: - subject = self._make_subject() - with tempfile.TemporaryDirectory(prefix="vibemouse-status-") as tmp: - status_file = Path(tmp) / "status.json" - setattr(subject, "_config", SimpleNamespace(status_file=status_file)) - - set_status = cast( - Callable[[bool], None], - getattr(subject, "_set_recording_status"), - ) - set_status(True) - - payload = cast( - dict[str, object], - json.loads(status_file.read_text(encoding="utf-8")), - ) - self.assertEqual(payload, {"recording": True, "state": "recording"}) - - def test_set_recording_status_writes_idle_payload(self) -> None: - subject = self._make_subject() - with tempfile.TemporaryDirectory(prefix="vibemouse-status-") as tmp: - status_file = Path(tmp) / "status.json" - setattr(subject, "_config", SimpleNamespace(status_file=status_file)) - - set_status = cast( - Callable[[bool], None], - getattr(subject, "_set_recording_status"), - ) - set_status(False) - - payload = cast( - dict[str, object], - json.loads(status_file.read_text(encoding="utf-8")), - ) - self.assertEqual(payload, {"recording": False, "state": "idle"}) - - -class VoiceMouseAppButtonBehaviorTests(unittest.TestCase): - @staticmethod - def _make_subject() -> VoiceMouseApp: - return object.__new__(VoiceMouseApp) - - def test_front_press_stops_recording_with_default_output_target(self) -> None: - subject = self._make_subject() - recording = SimpleNamespace(duration_s=1.1, path=Path("/tmp/voice.wav")) - setattr( - subject, - "_recorder", - SimpleNamespace(is_recording=True, stop_and_save=lambda: recording), - ) - - status_values: list[bool] = [] - worker_calls: list[tuple[object, str]] = [] - setattr( - subject, "_set_recording_status", lambda value: status_values.append(value) - ) - setattr( - subject, - "_start_transcription_worker", - lambda rec, *, output_target: worker_calls.append((rec, output_target)), - ) - - on_front = cast(Callable[[], None], getattr(subject, "_on_front_press")) - on_front() - - self.assertEqual(status_values, [False]) - self.assertEqual(worker_calls, [(recording, "default")]) - - def test_rear_press_stops_recording_and_routes_to_openclaw(self) -> None: - subject = self._make_subject() - recording = SimpleNamespace(duration_s=1.2, path=Path("/tmp/voice.wav")) - setattr( - subject, - "_recorder", - SimpleNamespace(is_recording=True, stop_and_save=lambda: recording), - ) - - status_values: list[bool] = [] - worker_calls: list[tuple[object, str]] = [] - send_enter_calls: list[str] = [] - setattr( - subject, "_set_recording_status", lambda value: status_values.append(value) - ) - setattr( - subject, - "_start_transcription_worker", - lambda rec, *, output_target: worker_calls.append((rec, output_target)), - ) - setattr( - subject, - "_output", - SimpleNamespace(send_enter=lambda mode: send_enter_calls.append(mode)), - ) - setattr(subject, "_config", SimpleNamespace(enter_mode="enter")) - - on_rear = cast(Callable[[], None], getattr(subject, "_on_rear_press")) - on_rear() - - self.assertEqual(status_values, [False]) - self.assertEqual(worker_calls, [(recording, "openclaw")]) - self.assertEqual(send_enter_calls, []) - - def test_rear_press_sends_enter_when_idle(self) -> None: - subject = self._make_subject() - setattr(subject, "_recorder", SimpleNamespace(is_recording=False)) - send_enter_calls: list[str] = [] - setattr( - subject, - "_output", - SimpleNamespace(send_enter=lambda mode: send_enter_calls.append(mode)), - ) - setattr(subject, "_config", SimpleNamespace(enter_mode="ctrl_enter")) - - on_rear = cast(Callable[[], None], getattr(subject, "_on_rear_press")) - on_rear() - - self.assertEqual(send_enter_calls, ["ctrl_enter"]) - - def test_rear_button_state_matrix(self) -> None: - for is_recording in (True, False): - with self.subTest(is_recording=is_recording): - subject = self._make_subject() - recording = SimpleNamespace( - duration_s=0.8, path=Path("/tmp/matrix.wav") - ) - setattr( - subject, - "_recorder", - SimpleNamespace( - is_recording=is_recording, - stop_and_save=lambda: recording, - ), - ) - setattr(subject, "_set_recording_status", lambda value: None) - - worker_calls: list[tuple[object, str]] = [] - send_enter_calls: list[str] = [] - setattr( - subject, - "_start_transcription_worker", - lambda rec, *, output_target: worker_calls.append( - (rec, output_target) - ), - ) - setattr( - subject, - "_output", - SimpleNamespace( - send_enter=lambda mode: send_enter_calls.append(mode) - ), - ) - setattr(subject, "_config", SimpleNamespace(enter_mode="enter")) - - on_rear = cast(Callable[[], None], getattr(subject, "_on_rear_press")) - on_rear() - - if is_recording: - self.assertEqual(worker_calls, [(recording, "openclaw")]) - self.assertEqual(send_enter_calls, []) - else: - self.assertEqual(worker_calls, []) - self.assertEqual(send_enter_calls, ["enter"]) - - def test_transcribe_and_output_openclaw_uses_openclaw_sender(self) -> None: - subject = self._make_subject() - recording = SimpleNamespace(duration_s=1.0, path=Path("/tmp/transcribe.wav")) - setattr( - subject, - "_transcriber", - SimpleNamespace( - transcribe=lambda path: "hello world", - device_in_use="cpu", - backend_in_use="funasr", - ), - ) - - openclaw_calls: list[str] = [] - inject_calls: list[tuple[str, bool]] = [] - setattr( - subject, - "_output", - SimpleNamespace( - send_to_openclaw_result=lambda text: openclaw_calls.append(text) - or SimpleNamespace(route="openclaw", reason="dispatched"), - inject_or_clipboard=lambda text, auto_paste: inject_calls.append( - (text, auto_paste) - ) - or "typed", - ), - ) - setattr(subject, "_config", SimpleNamespace(auto_paste=True)) - setattr(subject, "_transcribe_lock", threading.Lock()) - setattr(subject, "_workers_lock", threading.Lock()) - setattr(subject, "_workers", set()) - - removed_paths: list[Path] = [] - setattr(subject, "_safe_unlink", lambda path: removed_paths.append(path)) - - transcribe_and_output = cast( - Callable[[object, str], None], - getattr(subject, "_transcribe_and_output"), - ) - transcribe_and_output(recording, "openclaw") - - self.assertEqual(openclaw_calls, ["hello world"]) - self.assertEqual(inject_calls, []) - self.assertEqual(removed_paths, [Path("/tmp/transcribe.wav")]) - - -class VoiceMouseAppPrewarmTests(unittest.TestCase): - @staticmethod - def _make_subject() -> VoiceMouseApp: - return object.__new__(VoiceMouseApp) - - def test_maybe_prewarm_starts_worker_with_configured_delay(self) -> None: - subject = self._make_subject() - setattr( - subject, - "_config", - SimpleNamespace(prewarm_on_start=True, prewarm_delay_s=2.5), - ) - setattr(subject, "_prewarm_started", False) - - with patch("vibemouse.app.threading.Thread") as thread_cls: - maybe_prewarm = cast( - Callable[[], None], - getattr(subject, "_maybe_prewarm_transcriber"), - ) - maybe_prewarm() - - self.assertTrue(getattr(subject, "_prewarm_started")) - thread_cls.assert_called_once() - thread_kwargs = thread_cls.call_args.kwargs - self.assertEqual(thread_kwargs["args"], (2.5,)) - self.assertTrue(thread_kwargs["daemon"]) - target = thread_kwargs["target"] - self.assertIs(getattr(target, "__self__", None), subject) - self.assertIs( - getattr(target, "__func__", None), - getattr(VoiceMouseApp, "_prewarm_transcriber"), - ) - thread_cls.return_value.start.assert_called_once_with() - - def test_maybe_prewarm_skips_when_disabled(self) -> None: - subject = self._make_subject() - setattr( - subject, - "_config", - SimpleNamespace(prewarm_on_start=False, prewarm_delay_s=2.0), - ) - setattr(subject, "_prewarm_started", False) - - with patch("vibemouse.app.threading.Thread") as thread_cls: - maybe_prewarm = cast( - Callable[[], None], - getattr(subject, "_maybe_prewarm_transcriber"), - ) - maybe_prewarm() - - self.assertFalse(getattr(subject, "_prewarm_started")) - thread_cls.assert_not_called() - - def test_maybe_prewarm_skips_when_already_started(self) -> None: - subject = self._make_subject() - setattr( - subject, - "_config", - SimpleNamespace(prewarm_on_start=True, prewarm_delay_s=2.0), - ) - setattr(subject, "_prewarm_started", True) - - with patch("vibemouse.app.threading.Thread") as thread_cls: - maybe_prewarm = cast( - Callable[[], None], - getattr(subject, "_maybe_prewarm_transcriber"), - ) - maybe_prewarm() - - thread_cls.assert_not_called() - - def test_prewarm_transcriber_waits_before_warmup(self) -> None: - subject = self._make_subject() - wait_calls: list[float] = [] - prewarm_calls: list[bool] = [] - setattr( - subject, - "_stop_event", - SimpleNamespace(wait=lambda timeout: wait_calls.append(timeout) or False), - ) - setattr( - subject, - "_transcriber", - SimpleNamespace(prewarm=lambda: prewarm_calls.append(True)), - ) - - prewarm = cast( - Callable[[float], None], - getattr(subject, "_prewarm_transcriber"), - ) - prewarm(1.5) - - self.assertEqual(wait_calls, [1.5]) - self.assertEqual(prewarm_calls, [True]) - - def test_prewarm_transcriber_skips_when_stopped_during_delay(self) -> None: - subject = self._make_subject() - wait_calls: list[float] = [] - prewarm_calls: list[bool] = [] - setattr( - subject, - "_stop_event", - SimpleNamespace(wait=lambda timeout: wait_calls.append(timeout) or True), - ) - setattr( - subject, - "_transcriber", - SimpleNamespace(prewarm=lambda: prewarm_calls.append(True)), - ) - - prewarm = cast( - Callable[[float], None], - getattr(subject, "_prewarm_transcriber"), - ) - prewarm(2.0) - - self.assertEqual(wait_calls, [2.0]) - self.assertEqual(prewarm_calls, []) - - def test_prewarm_transcriber_without_delay_warms_immediately(self) -> None: - subject = self._make_subject() - prewarm_calls: list[bool] = [] - setattr( - subject, - "_stop_event", - SimpleNamespace( - wait=lambda timeout: (_ for _ in ()).throw( - AssertionError("wait should not be called when delay is zero") - ) - ), - ) - setattr( - subject, - "_transcriber", - SimpleNamespace(prewarm=lambda: prewarm_calls.append(True)), - ) - - prewarm = cast( - Callable[[float], None], - getattr(subject, "_prewarm_transcriber"), - ) - prewarm(0.0) - - self.assertEqual(prewarm_calls, [True]) diff --git a/tests/test_audio.py b/tests/test_audio.py deleted file mode 100644 index 6b1eac2..0000000 --- a/tests/test_audio.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import annotations - -import tempfile -import unittest -from pathlib import Path -from typing import final - -import numpy as np -from numpy.typing import NDArray - -from vibemouse.audio import AudioRecorder, AudioRecording - - -class _FakeSoundFile: - def __init__(self) -> None: - self.paths: list[Path] = [] - self.sample_rates: list[int] = [] - - def write( - self, file: str | Path, data: NDArray[np.float32], samplerate: int - ) -> None: - _ = data - self.paths.append(Path(file)) - self.sample_rates.append(samplerate) - - -@final -class _TestableAudioRecorder(AudioRecorder): - def set_soundfile(self, soundfile: _FakeSoundFile) -> None: - self._sf = soundfile - - def prime_recording(self, frame: NDArray[np.float32]) -> None: - with self._lock: - self._recording = True - self._stream = None - self._frames = [frame] - - -class AudioRecorderTests(unittest.TestCase): - @staticmethod - def _record_once( - recorder: _TestableAudioRecorder, frame: NDArray[np.float32] - ) -> AudioRecording: - recorder.prime_recording(frame) - recording = recorder.stop_and_save() - if recording is None: - raise AssertionError("Expected a recording to be produced") - return recording - - def test_each_recording_uses_unique_filename(self) -> None: - with tempfile.TemporaryDirectory(prefix="vibemouse-tests-") as tmp: - temp_dir = Path(tmp) - recorder = _TestableAudioRecorder( - sample_rate=16000, - channels=1, - dtype="float32", - temp_dir=temp_dir, - ) - soundfile = _FakeSoundFile() - recorder.set_soundfile(soundfile) - - frame = np.zeros((160, 1), dtype=np.float32) - first = self._record_once(recorder, frame) - second = self._record_once(recorder, frame) - - self.assertNotEqual(first.path, second.path) - self.assertTrue(first.path.name.startswith("recording_")) - self.assertTrue(second.path.name.startswith("recording_")) - self.assertEqual(first.path.suffix, ".wav") - self.assertEqual(second.path.suffix, ".wav") - self.assertEqual(soundfile.sample_rates, [16000, 16000]) diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index 696810b..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,297 +0,0 @@ -from __future__ import annotations - -import os -import unittest -from unittest.mock import patch - -from vibemouse.config import load_config - - -class LoadConfigTests(unittest.TestCase): - def test_defaults_disable_trust_remote_code(self) -> None: - with patch.dict(os.environ, {}, clear=True): - config = load_config() - - self.assertFalse(config.trust_remote_code) - self.assertFalse(config.auto_paste) - self.assertFalse(config.gestures_enabled) - self.assertEqual(config.gesture_trigger_button, "rear") - self.assertEqual(config.gesture_threshold_px, 120) - self.assertTrue(config.gesture_freeze_pointer) - self.assertTrue(config.gesture_restore_cursor) - self.assertEqual(config.gesture_up_action, "record_toggle") - self.assertEqual(config.gesture_down_action, "noop") - self.assertEqual(config.gesture_left_action, "noop") - self.assertEqual(config.gesture_right_action, "send_enter") - self.assertEqual(config.enter_mode, "enter") - self.assertEqual(config.button_debounce_ms, 150) - self.assertTrue(config.prewarm_on_start) - self.assertEqual(config.prewarm_delay_s, 0.0) - self.assertEqual(config.status_file.name, "vibemouse-status.json") - self.assertEqual(config.openclaw_command, "openclaw") - self.assertEqual(config.openclaw_agent, "main") - self.assertEqual(config.openclaw_timeout_s, 20.0) - self.assertEqual(config.openclaw_retries, 0) - self.assertEqual(config.front_button, "x1") - self.assertEqual(config.rear_button, "x2") - - def test_trust_remote_code_can_be_enabled(self) -> None: - with patch.dict( - os.environ, {"VIBEMOUSE_TRUST_REMOTE_CODE": "true"}, clear=True - ): - config = load_config() - - self.assertTrue(config.trust_remote_code) - - def test_auto_paste_can_be_enabled(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_AUTO_PASTE": "true"}, clear=True): - config = load_config() - - self.assertTrue(config.auto_paste) - - def test_gestures_can_be_enabled(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_GESTURES_ENABLED": "true"}, clear=True): - config = load_config() - - self.assertTrue(config.gestures_enabled) - - def test_gesture_freeze_pointer_can_be_disabled(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_FREEZE_POINTER": "false"}, - clear=True, - ): - config = load_config() - - self.assertFalse(config.gesture_freeze_pointer) - - def test_gesture_restore_cursor_can_be_disabled(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_RESTORE_CURSOR": "false"}, - clear=True, - ): - config = load_config() - - self.assertFalse(config.gesture_restore_cursor) - - def test_prewarm_on_start_can_be_disabled(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_PREWARM_ON_START": "false"}, - clear=True, - ): - config = load_config() - - self.assertFalse(config.prewarm_on_start) - - def test_prewarm_delay_can_be_configured(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_PREWARM_DELAY_S": "2.5"}, - clear=True, - ): - config = load_config() - - self.assertEqual(config.prewarm_delay_s, 2.5) - - def test_negative_prewarm_delay_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_PREWARM_DELAY_S": "-0.1"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_PREWARM_DELAY_S must be a non-negative float", - ): - _ = load_config() - - def test_status_file_can_be_overridden(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_STATUS_FILE": "/tmp/custom-vibemouse-status.json"}, - clear=True, - ): - config = load_config() - - self.assertEqual(str(config.status_file), "/tmp/custom-vibemouse-status.json") - - def test_enter_mode_can_be_configured(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_ENTER_MODE": "ctrl_enter"}, clear=True): - config = load_config() - - self.assertEqual(config.enter_mode, "ctrl_enter") - - def test_enter_mode_supports_none(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_ENTER_MODE": "none"}, clear=True): - config = load_config() - - self.assertEqual(config.enter_mode, "none") - - def test_invalid_enter_mode_is_rejected(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_ENTER_MODE": "meta_enter"}, clear=True): - with self.assertRaisesRegex( - ValueError, "VIBEMOUSE_ENTER_MODE must be one of" - ): - _ = load_config() - - def test_invalid_gesture_trigger_button_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_TRIGGER_BUTTON": "middle"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_GESTURE_TRIGGER_BUTTON must be one of", - ): - _ = load_config() - - def test_gesture_trigger_button_supports_right(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_TRIGGER_BUTTON": "right"}, - clear=True, - ): - config = load_config() - - self.assertEqual(config.gesture_trigger_button, "right") - - def test_gesture_action_supports_workspace_switches(self) -> None: - with patch.dict( - os.environ, - { - "VIBEMOUSE_GESTURE_LEFT_ACTION": "workspace_left", - "VIBEMOUSE_GESTURE_RIGHT_ACTION": "workspace_right", - }, - clear=True, - ): - config = load_config() - - self.assertEqual(config.gesture_left_action, "workspace_left") - self.assertEqual(config.gesture_right_action, "workspace_right") - - def test_invalid_gesture_action_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_UP_ACTION": "paste_now"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_GESTURE_UP_ACTION must be one of", - ): - _ = load_config() - - def test_negative_debounce_is_rejected(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_BUTTON_DEBOUNCE_MS": "-1"}, clear=True): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_BUTTON_DEBOUNCE_MS must be a non-negative integer", - ): - _ = load_config() - - def test_invalid_integer_reports_variable_name(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_SAMPLE_RATE": "abc"}, clear=True): - with self.assertRaisesRegex( - ValueError, "VIBEMOUSE_SAMPLE_RATE must be an integer" - ): - _ = load_config() - - def test_non_positive_integer_is_rejected(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_MERGE_LENGTH_S": "0"}, clear=True): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_MERGE_LENGTH_S must be a positive integer", - ): - _ = load_config() - - def test_non_positive_gesture_threshold_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_GESTURE_THRESHOLD_PX": "0"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_GESTURE_THRESHOLD_PX must be a positive integer", - ): - _ = load_config() - - def test_invalid_button_value_is_rejected(self) -> None: - with patch.dict(os.environ, {"VIBEMOUSE_FRONT_BUTTON": "x3"}, clear=True): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_FRONT_BUTTON must be either 'x1' or 'x2'", - ): - _ = load_config() - - def test_openclaw_fields_can_be_configured(self) -> None: - with patch.dict( - os.environ, - { - "VIBEMOUSE_OPENCLAW_COMMAND": "openclaw --profile prod", - "VIBEMOUSE_OPENCLAW_AGENT": "ops-bot", - "VIBEMOUSE_OPENCLAW_TIMEOUT_S": "7.5", - "VIBEMOUSE_OPENCLAW_RETRIES": "2", - }, - clear=True, - ): - config = load_config() - - self.assertEqual(config.openclaw_command, "openclaw --profile prod") - self.assertEqual(config.openclaw_agent, "ops-bot") - self.assertEqual(config.openclaw_timeout_s, 7.5) - self.assertEqual(config.openclaw_retries, 2) - - def test_empty_openclaw_command_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_OPENCLAW_COMMAND": " "}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_OPENCLAW_COMMAND must not be empty", - ): - _ = load_config() - - def test_non_positive_openclaw_timeout_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_OPENCLAW_TIMEOUT_S": "0"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_OPENCLAW_TIMEOUT_S must be a positive float", - ): - _ = load_config() - - def test_negative_openclaw_retries_is_rejected(self) -> None: - with patch.dict( - os.environ, - {"VIBEMOUSE_OPENCLAW_RETRIES": "-1"}, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_OPENCLAW_RETRIES must be a non-negative integer", - ): - _ = load_config() - - def test_same_front_and_rear_buttons_are_rejected(self) -> None: - with patch.dict( - os.environ, - { - "VIBEMOUSE_FRONT_BUTTON": "x1", - "VIBEMOUSE_REAR_BUTTON": "x1", - }, - clear=True, - ): - with self.assertRaisesRegex( - ValueError, - "VIBEMOUSE_FRONT_BUTTON and VIBEMOUSE_REAR_BUTTON must differ", - ): - _ = load_config() diff --git a/tests/test_deploy.py b/tests/test_deploy.py deleted file mode 100644 index c868aaa..0000000 --- a/tests/test_deploy.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import annotations - -import argparse -import tempfile -import unittest -from pathlib import Path -from unittest.mock import patch - -from vibemouse.deploy import ( - build_deploy_env, - render_env_file, - render_service_file, - run_deploy, -) - - -class DeployHelpersTests(unittest.TestCase): - def test_build_deploy_env_applies_preset_and_override(self) -> None: - env_map = build_deploy_env( - preset="fast", - openclaw_command="openclaw --profile prod", - openclaw_agent="ops", - openclaw_retries=5, - ) - - self.assertEqual( - env_map["VIBEMOUSE_OPENCLAW_COMMAND"], "openclaw --profile prod" - ) - self.assertEqual(env_map["VIBEMOUSE_OPENCLAW_AGENT"], "ops") - self.assertEqual(env_map["VIBEMOUSE_OPENCLAW_RETRIES"], "5") - self.assertEqual(env_map["VIBEMOUSE_BUTTON_DEBOUNCE_MS"], "120") - - def test_render_env_file_quotes_values(self) -> None: - content = render_env_file( - { - "VIBEMOUSE_OPENCLAW_COMMAND": "openclaw --profile prod", - "VIBEMOUSE_OPENCLAW_AGENT": "main", - } - ) - - self.assertIn('VIBEMOUSE_OPENCLAW_COMMAND="openclaw --profile prod"', content) - self.assertIn('VIBEMOUSE_OPENCLAW_AGENT="main"', content) - - def test_render_service_file_contains_paths(self) -> None: - env_file = Path("/tmp/vibemouse.env") - service = render_service_file( - env_file=env_file, exec_start="/tmp/vibemouse run" - ) - - self.assertIn("EnvironmentFile=/tmp/vibemouse.env", service) - self.assertIn("ExecStart=/tmp/vibemouse run", service) - - -class DeployCommandTests(unittest.TestCase): - def test_run_deploy_dry_run_does_not_write_files(self) -> None: - with tempfile.TemporaryDirectory(prefix="vibemouse-deploy-") as tmp: - env_file = Path(tmp) / "deploy.env" - service_file = Path(tmp) / "vibemouse.service" - args = argparse.Namespace( - preset="stable", - env_file=str(env_file), - service_file=str(service_file), - openclaw_command="openclaw", - openclaw_agent="main", - openclaw_retries=None, - exec_start="/tmp/vibemouse run", - skip_systemctl=True, - dry_run=True, - ) - - rc = run_deploy(args) - - self.assertEqual(rc, 0) - self.assertFalse(env_file.exists()) - self.assertFalse(service_file.exists()) - - def test_run_deploy_skip_systemctl_writes_files_and_runs_doctor(self) -> None: - with tempfile.TemporaryDirectory(prefix="vibemouse-deploy-") as tmp: - env_file = Path(tmp) / "deploy.env" - service_file = Path(tmp) / "vibemouse.service" - args = argparse.Namespace( - preset="stable", - env_file=str(env_file), - service_file=str(service_file), - openclaw_command="openclaw --profile prod", - openclaw_agent="ops", - openclaw_retries=2, - exec_start="/tmp/vibemouse run", - skip_systemctl=True, - dry_run=False, - ) - - with patch("vibemouse.deploy.run_doctor", return_value=0) as run_doctor: - rc = run_deploy(args) - - self.assertEqual(rc, 0) - self.assertEqual(run_doctor.call_count, 1) - self.assertTrue(env_file.exists()) - self.assertTrue(service_file.exists()) - self.assertIn('VIBEMOUSE_OPENCLAW_AGENT="ops"', env_file.read_text()) - - def test_run_deploy_rejects_negative_retry_override(self) -> None: - args = argparse.Namespace( - preset="stable", - env_file="/tmp/deploy.env", - service_file="/tmp/vibemouse.service", - openclaw_command="openclaw", - openclaw_agent="main", - openclaw_retries=-1, - exec_start="/tmp/vibemouse run", - skip_systemctl=True, - dry_run=True, - ) - rc = run_deploy(args) - self.assertEqual(rc, 1) diff --git a/tests/test_doctor.py b/tests/test_doctor.py deleted file mode 100644 index d2476b3..0000000 --- a/tests/test_doctor.py +++ /dev/null @@ -1,260 +0,0 @@ -from __future__ import annotations - -import tempfile -import unittest -from pathlib import Path -from types import SimpleNamespace -from typing import cast -from unittest.mock import patch - -from vibemouse.config import AppConfig -from vibemouse.doctor import ( - DoctorCheck, - _apply_doctor_fixes, - _ensure_user_service_active, - _fix_hyprland_return_bind_conflict, - _check_hyprland_return_bind_conflict, - _check_openclaw, - _parse_openclaw_command, - run_doctor, -) - - -class DoctorHelpersTests(unittest.TestCase): - def test_parse_openclaw_command_invalid_shell_syntax(self) -> None: - self.assertIsNone(_parse_openclaw_command('openclaw "')) - - def test_check_openclaw_reports_missing_executable(self) -> None: - config = cast( - AppConfig, - cast( - object, - SimpleNamespace(openclaw_command="openclaw", openclaw_agent="main"), - ), - ) - with patch("vibemouse.doctor.shutil.which", return_value=None): - checks = _check_openclaw(config) - - self.assertEqual(checks[0].status, "fail") - self.assertIn("executable not found", checks[0].detail) - - def test_check_openclaw_reports_agent_exists(self) -> None: - config = cast( - AppConfig, - cast( - object, - SimpleNamespace(openclaw_command="openclaw", openclaw_agent="main"), - ), - ) - with ( - patch("vibemouse.doctor.shutil.which", return_value="/usr/bin/openclaw"), - patch( - "vibemouse.doctor.subprocess.run", - return_value=SimpleNamespace( - returncode=0, - stdout='[{"id": "main"}]', - stderr="", - ), - ), - ): - checks = _check_openclaw(config) - - self.assertEqual([check.status for check in checks], ["ok", "ok"]) - - def test_hyprland_bind_conflict_detection(self) -> None: - with tempfile.TemporaryDirectory(prefix="vibemouse-doctor-") as tmp: - bind_path = ( - Path(tmp) / ".config" / "hypr" / "UserConfigs" / "UserKeybinds.conf" - ) - bind_path.parent.mkdir(parents=True, exist_ok=True) - _ = bind_path.write_text( - "bind = , mouse:275, sendshortcut, , Return, activewindow\n", - encoding="utf-8", - ) - - with patch("vibemouse.doctor.Path.home", return_value=Path(tmp)): - check = _check_hyprland_return_bind_conflict( - cast( - AppConfig, - cast(object, SimpleNamespace(rear_button="x1")), - ) - ) - - self.assertEqual(check.status, "fail") - self.assertIn("conflicting return bind", check.detail) - - def test_audio_input_check_reports_missing_dependency(self) -> None: - with patch( - "vibemouse.doctor.importlib.import_module", - side_effect=ModuleNotFoundError("sounddevice"), - ): - from vibemouse.doctor import _check_audio_input - - check = _check_audio_input(None) - - self.assertEqual(check.status, "fail") - self.assertIn("cannot import sounddevice", check.detail) - - def test_audio_input_check_reports_ok_when_input_device_exists(self) -> None: - fake_sounddevice = SimpleNamespace( - query_devices=lambda: [{"max_input_channels": 2}], - default=SimpleNamespace(device=(0, 1)), - check_input_settings=lambda **kwargs: kwargs, - ) - with patch( - "vibemouse.doctor.importlib.import_module", - return_value=fake_sounddevice, - ): - from vibemouse.doctor import _check_audio_input - - check = _check_audio_input( - cast( - AppConfig, - cast(object, SimpleNamespace(sample_rate=16000, channels=1)), - ) - ) - - self.assertEqual(check.status, "ok") - - def test_input_permission_check_fails_when_all_devices_denied(self) -> None: - fake_evdev = SimpleNamespace( - list_devices=lambda: ["/dev/input/event0"], - InputDevice=lambda path: (_ for _ in ()).throw(PermissionError(path)), - ecodes=SimpleNamespace(EV_KEY=1, BTN_SIDE=0x116, BTN_EXTRA=0x117), - ) - with ( - patch("vibemouse.doctor.sys.platform", "linux"), - patch("vibemouse.doctor.importlib.import_module", return_value=fake_evdev), - ): - from vibemouse.doctor import _check_input_device_permissions - - check = _check_input_device_permissions( - cast(AppConfig, cast(object, SimpleNamespace(rear_button="x1"))) - ) - - self.assertEqual(check.status, "fail") - self.assertIn("permission denied", check.detail) - - def test_fix_hyprland_return_bind_conflict_comments_conflicting_lines(self) -> None: - with tempfile.TemporaryDirectory(prefix="vibemouse-doctor-fix-") as tmp: - bind_path = ( - Path(tmp) / ".config" / "hypr" / "UserConfigs" / "UserKeybinds.conf" - ) - bind_path.parent.mkdir(parents=True, exist_ok=True) - _ = bind_path.write_text( - "bind = , mouse:275, sendshortcut, , Return, activewindow\n" - "bind = , mouse:276, sendshortcut, , Return, activewindow\n", - encoding="utf-8", - ) - - with ( - patch("vibemouse.doctor.Path.home", return_value=Path(tmp)), - patch("vibemouse.doctor._run_subprocess") as run_subprocess, - ): - _fix_hyprland_return_bind_conflict() - - content = bind_path.read_text(encoding="utf-8") - self.assertIn("auto-disabled by vibemouse doctor --fix", content) - self.assertEqual(run_subprocess.call_count, 1) - - def test_ensure_user_service_active_restarts_when_inactive(self) -> None: - calls: list[list[str]] = [] - - def fake_run(cmd: list[str], *, timeout: float) -> SimpleNamespace: - _ = timeout - calls.append(cmd) - if cmd[-2:] == ["is-active", "vibemouse.service"]: - return SimpleNamespace(returncode=3, stdout="inactive\n") - return SimpleNamespace(returncode=0, stdout="") - - with patch("vibemouse.doctor._run_subprocess", side_effect=fake_run): - _ensure_user_service_active() - - self.assertEqual( - calls, - [ - ["systemctl", "--user", "is-active", "vibemouse.service"], - ["systemctl", "--user", "restart", "vibemouse.service"], - ], - ) - - def test_apply_doctor_fixes_runs_both_fixers(self) -> None: - with ( - patch("vibemouse.doctor._fix_hyprland_return_bind_conflict") as fix_bind, - patch("vibemouse.doctor._ensure_user_service_active") as fix_service, - ): - _apply_doctor_fixes() - - self.assertEqual(fix_bind.call_count, 1) - self.assertEqual(fix_service.call_count, 1) - - -class DoctorCommandTests(unittest.TestCase): - def test_run_doctor_returns_nonzero_when_fail_exists(self) -> None: - with ( - patch( - "vibemouse.doctor._check_config_load", - return_value=( - DoctorCheck("config", "fail", "broken"), - None, - ), - ), - patch( - "vibemouse.doctor._check_hyprland_return_bind_conflict" - ) as bind_check, - patch("vibemouse.doctor._check_audio_input") as audio_check, - patch("vibemouse.doctor._check_input_device_permissions") as input_check, - patch("vibemouse.doctor._check_user_service_state") as service_check, - ): - bind_check.return_value = DoctorCheck("bind", "ok", "ok") - audio_check.return_value = DoctorCheck("audio", "ok", "ok") - input_check.return_value = DoctorCheck("input", "ok", "ok") - service_check.return_value = DoctorCheck("service", "ok", "ok") - rc = run_doctor() - - self.assertEqual(rc, 1) - - def test_run_doctor_with_fix_invokes_fix_path(self) -> None: - with ( - patch("vibemouse.doctor._apply_doctor_fixes") as apply_fixes, - patch( - "vibemouse.doctor._check_config_load", - return_value=( - DoctorCheck("config", "ok", "ok"), - cast( - AppConfig, - cast( - object, - SimpleNamespace( - openclaw_command="openclaw", - openclaw_agent="main", - rear_button="x2", - sample_rate=16000, - channels=1, - ), - ), - ), - ), - ), - patch("vibemouse.doctor._check_openclaw", return_value=[]), - patch( - "vibemouse.doctor._check_audio_input", - return_value=DoctorCheck("audio", "ok", "ok"), - ), - patch( - "vibemouse.doctor._check_input_device_permissions", - return_value=DoctorCheck("input", "ok", "ok"), - ), - patch( - "vibemouse.doctor._check_hyprland_return_bind_conflict", - return_value=DoctorCheck("bind", "ok", "ok"), - ), - patch( - "vibemouse.doctor._check_user_service_state", - return_value=DoctorCheck("service", "ok", "ok"), - ), - ): - rc = run_doctor(apply_fixes=True) - - self.assertEqual(rc, 0) - self.assertEqual(apply_fixes.call_count, 1) diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index 7a31ef0..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -import unittest -from types import SimpleNamespace -from unittest.mock import MagicMock, patch - -from vibemouse.main import main - - -class MainEntryTests(unittest.TestCase): - def test_doctor_subcommand_dispatches_to_doctor(self) -> None: - with ( - patch("vibemouse.main.run_doctor", return_value=7) as run_doctor, - patch("vibemouse.main.load_config") as load_config, - ): - rc = main(["doctor"]) - - self.assertEqual(rc, 7) - self.assertEqual(run_doctor.call_count, 1) - self.assertEqual(run_doctor.call_args.kwargs, {"apply_fixes": False}) - self.assertEqual(load_config.call_count, 0) - - def test_doctor_fix_flag_is_forwarded(self) -> None: - with patch("vibemouse.main.run_doctor", return_value=0) as run_doctor: - rc = main(["doctor", "--fix"]) - - self.assertEqual(rc, 0) - self.assertEqual(run_doctor.call_count, 1) - self.assertEqual(run_doctor.call_args.kwargs, {"apply_fixes": True}) - - def test_default_invocation_runs_app(self) -> None: - app_instance = MagicMock() - cfg = SimpleNamespace() - with ( - patch("vibemouse.main.load_config", return_value=cfg) as load_config, - patch( - "vibemouse.main.VoiceMouseApp", return_value=app_instance - ) as app_ctor, - ): - rc = main([]) - - self.assertEqual(rc, 0) - self.assertEqual(load_config.call_count, 1) - self.assertEqual(app_ctor.call_count, 1) - self.assertEqual(app_instance.run.call_count, 1) - - def test_explicit_run_subcommand_runs_app(self) -> None: - app_instance = MagicMock() - cfg = SimpleNamespace() - with ( - patch("vibemouse.main.load_config", return_value=cfg), - patch("vibemouse.main.VoiceMouseApp", return_value=app_instance), - ): - rc = main(["run"]) - - self.assertEqual(rc, 0) - self.assertEqual(app_instance.run.call_count, 1) - - def test_deploy_subcommand_dispatches_to_deploy(self) -> None: - with ( - patch("vibemouse.main.run_deploy", return_value=5) as run_deploy, - patch("vibemouse.main.load_config") as load_config, - ): - rc = main(["deploy", "--dry-run"]) - - self.assertEqual(rc, 5) - self.assertEqual(run_deploy.call_count, 1) - self.assertEqual(load_config.call_count, 0) diff --git a/tests/test_mouse_listener.py b/tests/test_mouse_listener.py deleted file mode 100644 index 6d3cb45..0000000 --- a/tests/test_mouse_listener.py +++ /dev/null @@ -1,153 +0,0 @@ -from __future__ import annotations - -import unittest -from collections.abc import Callable -from typing import cast -from unittest.mock import patch - -from vibemouse.mouse_listener import SideButtonListener - - -def _noop_button() -> None: - return - - -class SideButtonListenerGestureTests(unittest.TestCase): - @staticmethod - def _classify(dx: int, dy: int, threshold_px: int) -> str | None: - classify = cast( - Callable[[int, int, int], str | None], - getattr(SideButtonListener, "_classify_gesture"), - ) - return classify(dx, dy, threshold_px) - - def test_classify_returns_none_when_movement_is_small(self) -> None: - self.assertIsNone(self._classify(20, 10, 120)) - - def test_classify_returns_right_for_positive_dx(self) -> None: - self.assertEqual(self._classify(200, 30, 120), "right") - - def test_classify_returns_left_for_negative_dx(self) -> None: - self.assertEqual(self._classify(-220, 10, 120), "left") - - def test_classify_returns_up_for_negative_dy(self) -> None: - self.assertEqual(self._classify(20, -250, 120), "up") - - def test_classify_returns_down_for_positive_dy(self) -> None: - self.assertEqual(self._classify(30, 240, 120), "down") - - def test_constructor_rejects_invalid_trigger_button(self) -> None: - with self.assertRaisesRegex( - ValueError, - "gesture_trigger_button must be one of: front, rear, right", - ): - _ = SideButtonListener( - on_front_press=_noop_button, - on_rear_press=_noop_button, - front_button="x1", - rear_button="x2", - gesture_trigger_button="middle", - ) - - def test_constructor_accepts_right_trigger_button(self) -> None: - listener = SideButtonListener( - on_front_press=_noop_button, - on_rear_press=_noop_button, - front_button="x1", - rear_button="x2", - gesture_trigger_button="right", - ) - - self.assertIsNotNone(listener) - - def test_dispatch_gesture_calls_callback_when_present(self) -> None: - seen: list[str] = [] - - def on_gesture(direction: str) -> None: - seen.append(direction) - - listener = SideButtonListener( - on_front_press=_noop_button, - on_rear_press=_noop_button, - front_button="x1", - rear_button="x2", - on_gesture=on_gesture, - ) - - dispatch_gesture = cast( - Callable[[str], None], - getattr(listener, "_dispatch_gesture"), - ) - dispatch_gesture("up") - self.assertEqual(seen, ["up"]) - - def test_finish_gesture_restores_cursor_after_direction_action(self) -> None: - seen: list[str] = [] - restored: list[tuple[int, int]] = [] - - def on_gesture(direction: str) -> None: - seen.append(direction) - - listener = SideButtonListener( - on_front_press=_noop_button, - on_rear_press=_noop_button, - front_button="x1", - rear_button="x2", - on_gesture=on_gesture, - gestures_enabled=True, - gesture_trigger_button="right", - ) - - with patch.object(listener, "_read_cursor_position", return_value=(100, 200)): - start_capture = cast( - Callable[..., None], - getattr(listener, "_start_gesture_capture"), - ) - start_capture(initial_position=(0, 0)) - - accumulate = cast( - Callable[..., None], - getattr(listener, "_accumulate_gesture_delta"), - ) - accumulate(dx=300, dy=0) - - def capture_restore(position: tuple[int, int]) -> None: - restored.append(position) - - with patch.object( - listener, "_restore_cursor_position", side_effect=capture_restore - ): - finish_capture = cast( - Callable[[str], None], - getattr(listener, "_finish_gesture_capture"), - ) - finish_capture("right") - - self.assertEqual(seen, ["right"]) - self.assertEqual(restored, [(100, 200)]) - - def test_finish_small_movement_does_not_restore_cursor(self) -> None: - listener = SideButtonListener( - on_front_press=_noop_button, - on_rear_press=_noop_button, - front_button="x1", - rear_button="x2", - gestures_enabled=True, - gesture_trigger_button="right", - ) - - with patch.object(listener, "_read_cursor_position", return_value=(50, 60)): - start_capture = cast( - Callable[..., None], - getattr(listener, "_start_gesture_capture"), - ) - start_capture(initial_position=(0, 0)) - - with patch.object(listener, "_restore_cursor_position") as restore_mock: - finish_capture = cast( - Callable[[str], None], - getattr(listener, "_finish_gesture_capture"), - ) - finish_capture("right") - - self.assertEqual(restore_mock.call_count, 0) diff --git a/tests/test_output.py b/tests/test_output.py deleted file mode 100644 index 390d6b4..0000000 --- a/tests/test_output.py +++ /dev/null @@ -1,723 +0,0 @@ -from __future__ import annotations - -import subprocess -import unittest -from types import SimpleNamespace -from collections.abc import Callable -from typing import cast -from unittest.mock import patch - -from vibemouse.output import TextOutput - - -class _FakeKeyboardController: - def __init__(self, *, fail_on_press: bool = False) -> None: - self.events: list[tuple[str, object]] = [] - self._fail_on_press: bool = fail_on_press - - def press(self, key: object) -> None: - if self._fail_on_press: - raise RuntimeError("press failed") - self.events.append(("press", key)) - - def release(self, key: object) -> None: - self.events.append(("release", key)) - - def type(self, text: str) -> None: - self.events.append(("type", text)) - - -class TextOutputFocusProbeTests(unittest.TestCase): - @staticmethod - def _make_subject() -> TextOutput: - return object.__new__(TextOutput) - - def test_focus_probe_uses_timeout_and_accepts_positive_result(self) -> None: - captured_timeouts: list[float] = [] - - def fake_run(*args: object, **kwargs: object) -> SimpleNamespace: - _ = args - timeout = kwargs.get("timeout") - if isinstance(timeout, float): - captured_timeouts.append(timeout) - return SimpleNamespace(returncode=0, stdout="1\n") - - with patch("vibemouse.system_integration.subprocess.run", side_effect=fake_run): - subject = self._make_subject() - probe = cast(Callable[[], bool], getattr(subject, "_is_text_input_focused")) - call_probe: Callable[[], bool] = probe - result = call_probe() - - self.assertTrue(result) - self.assertEqual(captured_timeouts, [1.5]) - - @patch( - "vibemouse.system_integration.subprocess.run", - side_effect=subprocess.TimeoutExpired( - cmd=["python3", "-c", "..."], timeout=1.5 - ), - ) - def test_focus_probe_timeout_returns_false(self, _mock_run: object) -> None: - subject = self._make_subject() - probe = cast(Callable[[], bool], getattr(subject, "_is_text_input_focused")) - self.assertFalse(probe()) - - @patch( - "vibemouse.system_integration.subprocess.run", - side_effect=OSError("spawn failed"), - ) - def test_focus_probe_oserror_returns_false(self, _mock_run: object) -> None: - subject = self._make_subject() - probe = cast(Callable[[], bool], getattr(subject, "_is_text_input_focused")) - self.assertFalse(probe()) - - def test_focus_probe_prefers_system_integration_result(self) -> None: - subject = self._make_subject() - setattr( - subject, - "_system_integration", - SimpleNamespace(is_text_input_focused=lambda: True), - ) - - with patch( - "vibemouse.output.probe_text_input_focus_via_atspi" - ) as fallback_probe: - probe = cast(Callable[[], bool], getattr(subject, "_is_text_input_focused")) - self.assertTrue(probe()) - - self.assertEqual(fallback_probe.call_count, 0) - - def test_focus_probe_falls_back_when_integration_returns_none(self) -> None: - subject = self._make_subject() - setattr( - subject, - "_system_integration", - SimpleNamespace(is_text_input_focused=lambda: None), - ) - - with patch( - "vibemouse.output.probe_text_input_focus_via_atspi", - return_value=True, - ) as fallback_probe: - probe = cast(Callable[[], bool], getattr(subject, "_is_text_input_focused")) - self.assertTrue(probe()) - - self.assertEqual(fallback_probe.call_count, 1) - - -class TextOutputRoutingTests(unittest.TestCase): - @staticmethod - def _make_subject() -> TextOutput: - return object.__new__(TextOutput) - - @staticmethod - def _bind_keyboard(subject: TextOutput, keyboard: _FakeKeyboardController) -> None: - setattr(subject, "_kb", keyboard) - setattr(subject, "_ctrl_key", "CTRL") - setattr(subject, "_shift_key", "SHIFT") - setattr(subject, "_enter_key", "ENTER") - setattr(subject, "_atspi", None) - setattr(subject, "_hyprland_session", False) - setattr(subject, "_openclaw_command", "openclaw") - setattr(subject, "_openclaw_agent", None) - setattr(subject, "_openclaw_timeout_s", 20.0) - setattr(subject, "_openclaw_retries", 0) - setattr( - subject, - "_system_integration", - SimpleNamespace( - send_shortcut=lambda mod, key: False, - is_terminal_window_active=lambda: None, - paste_shortcuts=lambda terminal_active: (), - send_enter_via_accessibility=lambda: None, - is_text_input_focused=lambda: None, - ), - ) - - def test_send_to_openclaw_success_returns_openclaw(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with patch( - "vibemouse.output.subprocess.Popen", - return_value=SimpleNamespace(), - ) as popen_mock: - route = subject.send_to_openclaw("hello") - detail = subject.send_to_openclaw_result("hello") - - self.assertEqual(route, "openclaw") - self.assertEqual( - popen_mock.call_args.args[0], - ["openclaw", "agent", "--message", "hello", "--json"], - ) - self.assertEqual(detail.route, "openclaw") - self.assertEqual(detail.reason, "dispatched") - - def test_send_to_openclaw_includes_agent_when_configured(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_openclaw_agent", "ops") - - with patch( - "vibemouse.output.subprocess.Popen", - return_value=SimpleNamespace(), - ) as popen_mock: - route = subject.send_to_openclaw("hello") - - self.assertEqual(route, "openclaw") - self.assertEqual( - popen_mock.call_args.args[0], - ["openclaw", "agent", "--message", "hello", "--json", "--agent", "ops"], - ) - - def test_send_to_openclaw_invalid_command_falls_back_to_clipboard(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_openclaw_command", 'openclaw "') - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - ): - route = subject.send_to_openclaw("hello") - - self.assertEqual(route, "clipboard") - self.assertEqual(copy_mock.call_count, 1) - - detail = subject.send_to_openclaw_result("hello") - self.assertEqual(detail.route, "clipboard") - self.assertEqual(detail.reason, "invalid_command") - - def test_send_to_openclaw_spawn_error_falls_back_to_clipboard(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with ( - patch( - "vibemouse.output.subprocess.Popen", - side_effect=OSError("openclaw missing"), - ), - patch("vibemouse.output.pyperclip.copy") as copy_mock, - ): - route = subject.send_to_openclaw("hello") - - self.assertEqual(route, "clipboard") - self.assertEqual(copy_mock.call_count, 1) - - def test_send_to_openclaw_retries_once_then_succeeds(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_openclaw_retries", 1) - - popen_side_effects = [ - OSError("temporary spawn failure"), - SimpleNamespace(), - ] - with patch( - "vibemouse.output.subprocess.Popen", - side_effect=popen_side_effects, - ) as popen_mock: - detail = subject.send_to_openclaw_result("hello") - - self.assertEqual(detail.route, "openclaw") - self.assertEqual(detail.reason, "dispatched_after_retry_1") - self.assertEqual(popen_mock.call_count, 2) - - def test_send_to_openclaw_retries_exhausted_falls_back_to_clipboard(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_openclaw_retries", 1) - - with ( - patch( - "vibemouse.output.subprocess.Popen", - side_effect=OSError("spawn failure"), - ), - patch("vibemouse.output.pyperclip.copy") as copy_mock, - ): - detail = subject.send_to_openclaw_result("hello") - - self.assertEqual(detail.route, "clipboard") - self.assertEqual(detail.reason, "spawn_error:OSError") - self.assertEqual(copy_mock.call_count, 1) - - @staticmethod - def _not_focused() -> bool: - return False - - def test_clipboard_route_without_auto_paste(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - - copied: list[str] = [] - - def fake_copy(text: str) -> None: - copied.append(text) - - with patch("vibemouse.output.pyperclip.copy", side_effect=fake_copy): - route = subject.inject_or_clipboard(" hello ", auto_paste=False) - - self.assertEqual(route, "clipboard") - self.assertEqual(copied, ["hello"]) - self.assertEqual(keyboard.events, []) - - def test_auto_paste_route_uses_ctrl_v(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - - with patch("vibemouse.output.pyperclip.copy") as copy_mock: - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual( - keyboard.events, - [ - ("press", "CTRL"), - ("press", "v"), - ("release", "v"), - ("release", "CTRL"), - ], - ) - - def test_auto_paste_uses_system_shortcut_candidates_when_available(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr( - subject, - "_system_integration", - SimpleNamespace( - send_shortcut=lambda mod, key: mod == "ALT" and key == "V", - is_terminal_window_active=lambda: True, - paste_shortcuts=lambda terminal_active: (("ALT", "V"),) - if terminal_active - else (), - send_enter_via_accessibility=lambda: None, - is_text_input_focused=lambda: None, - ), - ) - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - patch("vibemouse.output.subprocess.run") as run_mock, - ): - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual(run_mock.call_count, 0) - self.assertEqual(keyboard.events, []) - - def test_auto_paste_system_shortcuts_fall_back_to_keyboard(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr( - subject, - "_system_integration", - SimpleNamespace( - send_shortcut=lambda mod, key: False, - is_terminal_window_active=lambda: True, - paste_shortcuts=lambda terminal_active: (("ALT", "V"),) - if terminal_active - else (), - send_enter_via_accessibility=lambda: None, - is_text_input_focused=lambda: None, - ), - ) - - with patch("vibemouse.output.pyperclip.copy") as copy_mock: - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual( - keyboard.events, - [ - ("press", "CTRL"), - ("press", "v"), - ("release", "v"), - ("release", "CTRL"), - ], - ) - - def test_auto_paste_prefers_hyprland_sendshortcut(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr(subject, "_hyprland_session", True) - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - patch.object( - subject, - "_is_hyprland_terminal_active", - return_value=False, - ), - patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock, - ): - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual(run_mock.call_count, 1) - self.assertEqual(keyboard.events, []) - - def test_auto_paste_hyprland_failure_falls_back_to_ctrl_v(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr(subject, "_hyprland_session", True) - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - patch.object( - subject, - "_is_hyprland_terminal_active", - return_value=False, - ), - patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace(returncode=1, stdout=""), - ) as run_mock, - ): - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual(run_mock.call_count, 1) - self.assertEqual( - keyboard.events, - [ - ("press", "CTRL"), - ("press", "v"), - ("release", "v"), - ("release", "CTRL"), - ], - ) - - def test_auto_paste_hyprland_terminal_prefers_ctrl_shift_v(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr(subject, "_hyprland_session", True) - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - patch.object( - subject, - "_is_hyprland_terminal_active", - return_value=True, - ), - patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock, - ): - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual(run_mock.call_count, 1) - run_args = cast(list[str], run_mock.call_args.args[0]) - self.assertEqual( - run_args, - ["hyprctl", "dispatch", "sendshortcut", "CTRL SHIFT, V, activewindow"], - ) - self.assertEqual(keyboard.events, []) - - def test_auto_paste_hyprland_terminal_uses_shift_insert_fallback(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - setattr(subject, "_hyprland_session", True) - - with ( - patch("vibemouse.output.pyperclip.copy") as copy_mock, - patch.object( - subject, - "_is_hyprland_terminal_active", - return_value=True, - ), - patch( - "vibemouse.output.subprocess.run", - side_effect=[ - SimpleNamespace(returncode=1, stdout=""), - SimpleNamespace(returncode=0, stdout="ok\n"), - ], - ) as run_mock, - ): - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "pasted") - self.assertEqual(copy_mock.call_count, 1) - self.assertEqual(run_mock.call_count, 2) - first_args = cast(list[str], run_mock.call_args_list[0].args[0]) - second_args = cast(list[str], run_mock.call_args_list[1].args[0]) - self.assertEqual( - first_args, - ["hyprctl", "dispatch", "sendshortcut", "CTRL SHIFT, V, activewindow"], - ) - self.assertEqual( - second_args, - ["hyprctl", "dispatch", "sendshortcut", "SHIFT, Insert, activewindow"], - ) - self.assertEqual(keyboard.events, []) - - def test_hyprland_terminal_detection_by_window_class(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_hyprland_session", True) - - with patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace( - returncode=0, - stdout='{"class":"foot","initialClass":"foot","title":"OpenCode"}', - ), - ): - probe = cast( - Callable[[], bool], - getattr(subject, "_is_hyprland_terminal_active"), - ) - self.assertTrue(probe()) - - def test_hyprland_terminal_detection_by_title_hint(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_hyprland_session", True) - - with patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace( - returncode=0, - stdout='{"class":"Code","initialClass":"Code","title":"tmux"}', - ), - ): - probe = cast( - Callable[[], bool], - getattr(subject, "_is_hyprland_terminal_active"), - ) - self.assertTrue(probe()) - - def test_hyprland_terminal_detection_false_for_non_terminal_window(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_hyprland_session", True) - - with patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace( - returncode=0, - stdout='{"class":"chromium","initialClass":"chromium","title":"ChatGPT"}', - ), - ): - probe = cast( - Callable[[], bool], - getattr(subject, "_is_hyprland_terminal_active"), - ) - self.assertFalse(probe()) - - def test_terminal_detection_prefers_system_integration(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr( - subject, - "_system_integration", - SimpleNamespace( - send_shortcut=lambda mod, key: False, - is_terminal_window_active=lambda: True, - paste_shortcuts=lambda terminal_active: (), - send_enter_via_accessibility=lambda: None, - is_text_input_focused=lambda: None, - ), - ) - - with patch("vibemouse.output.subprocess.run") as run_mock: - probe = cast( - Callable[[], bool], - getattr(subject, "_is_hyprland_terminal_active"), - ) - self.assertTrue(probe()) - - self.assertEqual(run_mock.call_count, 0) - - def test_auto_paste_failure_falls_back_to_clipboard(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_is_text_input_focused", self._not_focused) - - def fail_paste() -> None: - raise RuntimeError("paste failure") - - setattr(subject, "_paste_clipboard", fail_paste) - - with patch("vibemouse.output.pyperclip.copy") as copy_mock: - route = subject.inject_or_clipboard("hello", auto_paste=True) - - self.assertEqual(route, "clipboard") - self.assertEqual(copy_mock.call_count, 1) - - def test_send_enter_uses_enter_mode(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with patch("vibemouse.output.time.sleep"): - subject.send_enter(mode="enter") - - self.assertEqual(keyboard.events, [("press", "ENTER"), ("release", "ENTER")]) - - def test_send_enter_supports_none_mode(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - subject.send_enter(mode="none") - - self.assertEqual(keyboard.events, []) - - def test_send_enter_prefers_atspi_when_available(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - class _FakeKeySynthType: - PRESSRELEASE: object = object() - - class _FakeAtspi: - KeySynthType: type[_FakeKeySynthType] = _FakeKeySynthType - - @staticmethod - def generate_keyboard_event( - keyval: int, - keystring: str | None, - synth_type: object, - ) -> bool: - _ = keyval - _ = keystring - _ = synth_type - return True - - setattr(subject, "_atspi", _FakeAtspi()) - - subject.send_enter(mode="enter") - - self.assertEqual(keyboard.events, []) - - def test_send_enter_prefers_system_accessibility_when_available(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr( - subject, - "_system_integration", - SimpleNamespace(send_enter_via_accessibility=lambda: True), - ) - - with patch("vibemouse.output.probe_send_enter_via_atspi") as probe_mock: - subject.send_enter(mode="enter") - - self.assertEqual(probe_mock.call_count, 0) - self.assertEqual(keyboard.events, []) - - def test_send_enter_falls_back_to_probe_when_integration_returns_none(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr( - subject, - "_system_integration", - SimpleNamespace(send_enter_via_accessibility=lambda: None), - ) - - with patch( - "vibemouse.output.probe_send_enter_via_atspi", return_value=True - ) as probe_mock: - subject.send_enter(mode="enter") - - self.assertEqual(probe_mock.call_count, 1) - self.assertEqual(keyboard.events, []) - - def test_send_enter_prefers_hyprland_sendshortcut_when_available(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - setattr(subject, "_hyprland_session", True) - - with patch( - "vibemouse.output.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock: - subject.send_enter(mode="enter") - - self.assertEqual(run_mock.call_count, 1) - self.assertEqual(keyboard.events, []) - - def test_send_enter_supports_ctrl_enter(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with patch("vibemouse.output.time.sleep"): - subject.send_enter(mode="ctrl_enter") - - self.assertEqual( - keyboard.events, - [ - ("press", "CTRL"), - ("press", "ENTER"), - ("release", "ENTER"), - ("release", "CTRL"), - ], - ) - - def test_send_enter_supports_shift_enter(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with patch("vibemouse.output.time.sleep"): - subject.send_enter(mode="shift_enter") - - self.assertEqual( - keyboard.events, - [ - ("press", "SHIFT"), - ("press", "ENTER"), - ("release", "ENTER"), - ("release", "SHIFT"), - ], - ) - - def test_send_enter_rejects_unknown_mode(self) -> None: - subject = self._make_subject() - keyboard = _FakeKeyboardController() - self._bind_keyboard(subject, keyboard) - - with self.assertRaisesRegex(ValueError, "Unsupported enter mode"): - subject.send_enter(mode="meta_enter") diff --git a/tests/test_system_integration.py b/tests/test_system_integration.py deleted file mode 100644 index efa0de6..0000000 --- a/tests/test_system_integration.py +++ /dev/null @@ -1,209 +0,0 @@ -from __future__ import annotations - -import subprocess -import unittest -from types import SimpleNamespace -from unittest.mock import patch - -from vibemouse.system_integration import ( - HyprlandSystemIntegration, - MacOSSystemIntegration, - NoopSystemIntegration, - WindowsSystemIntegration, - create_system_integration, - detect_hyprland_session, - is_terminal_window_payload, - probe_send_enter_via_atspi, - probe_text_input_focus_via_atspi, -) - - -class SystemIntegrationDetectionTests(unittest.TestCase): - def test_detect_hyprland_by_desktop_name(self) -> None: - env = {"XDG_CURRENT_DESKTOP": "Hyprland"} - self.assertTrue(detect_hyprland_session(env=env)) - - def test_detect_hyprland_by_instance_signature(self) -> None: - env = {"HYPRLAND_INSTANCE_SIGNATURE": "abc"} - self.assertTrue(detect_hyprland_session(env=env)) - - def test_detect_hyprland_false_when_no_markers(self) -> None: - self.assertFalse(detect_hyprland_session(env={})) - - def test_factory_returns_hyprland_integration(self) -> None: - integration = create_system_integration(env={"XDG_CURRENT_DESKTOP": "Hyprland"}) - self.assertIsInstance(integration, HyprlandSystemIntegration) - - def test_factory_returns_noop_integration(self) -> None: - integration = create_system_integration(env={}, platform_name="linux") - self.assertIsInstance(integration, NoopSystemIntegration) - - def test_factory_returns_windows_integration(self) -> None: - integration = create_system_integration(env={}, platform_name="win32") - self.assertIsInstance(integration, WindowsSystemIntegration) - - def test_factory_returns_macos_integration(self) -> None: - integration = create_system_integration(env={}, platform_name="darwin") - self.assertIsInstance(integration, MacOSSystemIntegration) - - -class HyprlandSystemIntegrationTests(unittest.TestCase): - def test_send_shortcut_uses_hyprctl_dispatch(self) -> None: - integration = HyprlandSystemIntegration() - with patch( - "vibemouse.system_integration.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock: - ok = integration.send_shortcut(mod="CTRL SHIFT", key="V") - - self.assertTrue(ok) - self.assertEqual( - run_mock.call_args.args[0], - ["hyprctl", "dispatch", "sendshortcut", "CTRL SHIFT, V, activewindow"], - ) - - def test_switch_workspace_left_uses_expected_argument(self) -> None: - integration = HyprlandSystemIntegration() - with patch( - "vibemouse.system_integration.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="ok\n"), - ) as run_mock: - ok = integration.switch_workspace("left") - - self.assertTrue(ok) - self.assertEqual( - run_mock.call_args.args[0], - ["hyprctl", "dispatch", "workspace", "e-1"], - ) - - def test_switch_workspace_handles_timeout(self) -> None: - integration = HyprlandSystemIntegration() - with patch( - "vibemouse.system_integration.subprocess.run", - side_effect=subprocess.TimeoutExpired(cmd=["hyprctl"], timeout=1.0), - ): - self.assertFalse(integration.switch_workspace("right")) - - def test_cursor_position_returns_tuple_from_json(self) -> None: - integration = HyprlandSystemIntegration() - with patch( - "vibemouse.system_integration.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout='{"x":120.5,"y":75}'), - ): - position = integration.cursor_position() - - self.assertEqual(position, (120, 75)) - - def test_noop_focus_probe_returns_none(self) -> None: - integration = NoopSystemIntegration() - self.assertIsNone(integration.is_text_input_focused()) - - def test_noop_enter_accessibility_returns_none(self) -> None: - integration = NoopSystemIntegration() - self.assertIsNone(integration.send_enter_via_accessibility()) - - def test_hyprland_enter_accessibility_delegates_to_probe(self) -> None: - integration = HyprlandSystemIntegration() - with patch( - "vibemouse.system_integration.probe_send_enter_via_atspi", - return_value=True, - ) as probe_mock: - ok = integration.send_enter_via_accessibility() - - self.assertTrue(ok) - self.assertEqual(probe_mock.call_count, 1) - - def test_hyprland_terminal_active_detection_uses_active_window_payload( - self, - ) -> None: - integration = HyprlandSystemIntegration() - with patch.object( - integration, - "active_window", - return_value={"class": "foot", "initialClass": "foot", "title": "dev"}, - ): - self.assertTrue(integration.is_terminal_window_active()) - - def test_hyprland_paste_shortcuts_terminal_and_default(self) -> None: - integration = HyprlandSystemIntegration() - self.assertEqual( - integration.paste_shortcuts(terminal_active=True), - (("CTRL SHIFT", "V"), ("SHIFT", "Insert"), ("CTRL", "V")), - ) - self.assertEqual( - integration.paste_shortcuts(terminal_active=False), - (("CTRL", "V"),), - ) - - def test_windows_paste_shortcuts_terminal_and_default(self) -> None: - integration = WindowsSystemIntegration() - self.assertEqual( - integration.paste_shortcuts(terminal_active=True), - (("CTRL SHIFT", "V"), ("SHIFT", "Insert"), ("CTRL", "V")), - ) - self.assertEqual( - integration.paste_shortcuts(terminal_active=False), - (("CTRL", "V"),), - ) - - def test_macos_paste_shortcuts_use_cmd_v(self) -> None: - integration = MacOSSystemIntegration() - self.assertEqual( - integration.paste_shortcuts(terminal_active=True), - (("CMD", "V"),), - ) - self.assertEqual( - integration.paste_shortcuts(terminal_active=False), - (("CMD", "V"),), - ) - - def test_terminal_payload_detection_by_title_hint(self) -> None: - payload = {"class": "Code", "initialClass": "Code", "title": "tmux"} - self.assertTrue(is_terminal_window_payload(payload)) - - def test_terminal_payload_detection_false_for_browser_window(self) -> None: - payload = { - "class": "chromium", - "initialClass": "chromium", - "title": "ChatGPT", - } - self.assertFalse(is_terminal_window_payload(payload)) - - def test_probe_text_input_focus_returns_true_when_script_outputs_one(self) -> None: - with patch( - "vibemouse.system_integration.subprocess.run", - return_value=SimpleNamespace(returncode=0, stdout="1\n"), - ): - self.assertTrue(probe_text_input_focus_via_atspi()) - - def test_probe_text_input_focus_timeout_returns_false(self) -> None: - with patch( - "vibemouse.system_integration.subprocess.run", - side_effect=subprocess.TimeoutExpired(cmd=["python3"], timeout=1.5), - ): - self.assertFalse(probe_text_input_focus_via_atspi()) - - def test_probe_send_enter_with_supplied_module_returns_true(self) -> None: - class _FakeKeySynthType: - PRESSRELEASE: object = object() - - class _FakeAtspi: - KeySynthType: type[_FakeKeySynthType] = _FakeKeySynthType - - @staticmethod - def generate_keyboard_event( - keyval: int, - keystring: str | None, - synth_type: object, - ) -> bool: - _ = keyval - _ = keystring - _ = synth_type - return True - - self.assertTrue( - probe_send_enter_via_atspi(atspi_module=_FakeAtspi(), lazy_load=False) - ) - - def test_probe_send_enter_without_module_returns_false(self) -> None: - self.assertFalse(probe_send_enter_via_atspi(atspi_module=None, lazy_load=False)) diff --git a/vibemouse/__init__.py b/vibemouse/__init__.py deleted file mode 100644 index 07c5de9..0000000 --- a/vibemouse/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -__all__ = ["__version__"] -__version__ = "0.1.0" diff --git a/vibemouse/app.py b/vibemouse/app.py deleted file mode 100644 index 01f8ba9..0000000 --- a/vibemouse/app.py +++ /dev/null @@ -1,355 +0,0 @@ -from __future__ import annotations - -import json -import subprocess -import threading -from pathlib import Path -from typing import Literal - -from vibemouse.audio import AudioRecorder, AudioRecording -from vibemouse.config import AppConfig -from vibemouse.mouse_listener import SideButtonListener -from vibemouse.output import TextOutput -from vibemouse.system_integration import SystemIntegration, create_system_integration -from vibemouse.transcriber import SenseVoiceTranscriber - - -TranscriptionTarget = Literal["default", "openclaw"] - - -class VoiceMouseApp: - def __init__(self, config: AppConfig) -> None: - if config.front_button == config.rear_button: - raise ValueError("Front and rear side buttons must be different") - - self._config: AppConfig = config - self._system_integration: SystemIntegration = create_system_integration() - self._recorder: AudioRecorder = AudioRecorder( - sample_rate=config.sample_rate, - channels=config.channels, - dtype=config.dtype, - temp_dir=config.temp_dir, - ) - self._transcriber: SenseVoiceTranscriber = SenseVoiceTranscriber(config) - self._output: TextOutput = TextOutput( - system_integration=self._system_integration, - openclaw_command=config.openclaw_command, - openclaw_agent=config.openclaw_agent, - openclaw_timeout_s=config.openclaw_timeout_s, - openclaw_retries=config.openclaw_retries, - ) - self._listener: SideButtonListener = SideButtonListener( - on_front_press=self._on_front_press, - on_rear_press=self._on_rear_press, - on_gesture=self._on_gesture, - front_button=config.front_button, - rear_button=config.rear_button, - debounce_s=config.button_debounce_ms / 1000.0, - gestures_enabled=config.gestures_enabled, - gesture_trigger_button=config.gesture_trigger_button, - gesture_threshold_px=config.gesture_threshold_px, - gesture_freeze_pointer=config.gesture_freeze_pointer, - gesture_restore_cursor=config.gesture_restore_cursor, - system_integration=self._system_integration, - ) - self._stop_event: threading.Event = threading.Event() - self._transcribe_lock: threading.Lock = threading.Lock() - self._workers_lock: threading.Lock = threading.Lock() - self._workers: set[threading.Thread] = set() - self._prewarm_started: bool = False - - def run(self) -> None: - self._listener.start() - self._set_recording_status(False) - print( - "VibeMouse ready. " - + f"Model={self._config.model_name}, preferred_device={self._config.device}, " - + f"backend={self._config.transcriber_backend}, auto_paste={self._config.auto_paste}, " - + f"enter_mode={self._config.enter_mode}, debounce_ms={self._config.button_debounce_ms}, " - + f"front_button={self._config.front_button}, rear_button={self._config.rear_button}, " - + f"gestures_enabled={self._config.gestures_enabled}, " - + f"gesture_trigger={self._config.gesture_trigger_button}, " - + f"gesture_threshold_px={self._config.gesture_threshold_px}, " - + f"gesture_freeze_pointer={self._config.gesture_freeze_pointer}, " - + f"gesture_restore_cursor={self._config.gesture_restore_cursor}, " - + f"prewarm_on_start={self._config.prewarm_on_start}, " - + f"prewarm_delay_s={self._config.prewarm_delay_s}. " - + "Press side-front to start/stop recording. While recording, side-rear sends transcript to OpenClaw; otherwise side-rear sends Enter." - ) - self._maybe_prewarm_transcriber() - try: - _ = self._stop_event.wait() - except KeyboardInterrupt: - self._stop_event.set() - finally: - self.shutdown() - - def shutdown(self) -> None: - self._listener.stop() - self._recorder.cancel() - self._set_recording_status(False) - with self._workers_lock: - workers = list(self._workers) - still_running: list[threading.Thread] = [] - for worker in workers: - worker.join(timeout=5) - if worker.is_alive(): - still_running.append(worker) - if still_running: - print( - f"Shutdown warning: {len(still_running)} transcription worker(s) are still running" - ) - - def _on_front_press(self) -> None: - if not self._recorder.is_recording: - try: - self._recorder.start() - self._set_recording_status(True) - print("Recording started") - except Exception as error: - self._set_recording_status(False) - print(f"Failed to start recording: {error}") - return - - try: - recording = self._stop_recording() - except Exception as error: - print(f"Failed to stop recording: {error}") - return - - if recording is None: - return - - self._start_transcription_worker(recording, output_target="default") - - def _on_rear_press(self) -> None: - if self._recorder.is_recording: - try: - recording = self._stop_recording() - except Exception as error: - print(f"Failed to stop recording from rear button: {error}") - return - - if recording is None: - return - - print("Recording stopped by rear button, sending transcript to OpenClaw") - self._start_transcription_worker(recording, output_target="openclaw") - return - - try: - self._output.send_enter(mode=self._config.enter_mode) - if self._config.enter_mode == "none": - print("Enter key handling disabled (enter_mode=none)") - else: - print("Enter key sent") - except Exception as error: - print(f"Failed to send Enter: {error}") - - def _on_gesture(self, direction: str) -> None: - action = self._resolve_gesture_action(direction) - if action == "noop": - print(f"Gesture '{direction}' recognized with no action configured") - return - - if action == "record_toggle": - print(f"Gesture '{direction}' -> toggle recording") - self._on_front_press() - return - - if action == "send_enter": - mode = self._config.enter_mode - if mode == "none": - mode = "enter" - try: - self._output.send_enter(mode=mode) - print(f"Gesture '{direction}' -> send enter ({mode})") - except Exception as error: - print(f"Gesture '{direction}' failed to send enter: {error}") - return - - if action == "workspace_left": - if self._switch_workspace("left"): - print(f"Gesture '{direction}' -> switch workspace left") - else: - print(f"Gesture '{direction}' failed to switch workspace left") - return - - if action == "workspace_right": - if self._switch_workspace("right"): - print(f"Gesture '{direction}' -> switch workspace right") - else: - print(f"Gesture '{direction}' failed to switch workspace right") - return - - print(f"Gesture '{direction}' mapped to unknown action '{action}'") - - def _resolve_gesture_action(self, direction: str) -> str: - mapping = { - "up": self._config.gesture_up_action, - "down": self._config.gesture_down_action, - "left": self._config.gesture_left_action, - "right": self._config.gesture_right_action, - } - return mapping.get(direction, "noop") - - def _switch_workspace(self, direction: str) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - return bool(system_integration.switch_workspace(direction)) - except Exception: - return False - - workspace_arg = "e-1" if direction == "left" else "e+1" - try: - proc = subprocess.run( - ["hyprctl", "dispatch", "workspace", workspace_arg], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - def _stop_recording(self) -> AudioRecording | None: - try: - recording = self._recorder.stop_and_save() - except Exception as error: - self._set_recording_status(False) - raise RuntimeError(error) from error - - self._set_recording_status(False) - if recording is None: - print("Recording was empty and has been discarded") - return None - return recording - - def _start_transcription_worker( - self, - recording: AudioRecording, - *, - output_target: TranscriptionTarget, - ) -> None: - worker = threading.Thread( - target=self._transcribe_and_output, - args=(recording, output_target), - daemon=True, - ) - with self._workers_lock: - self._workers.add(worker) - worker.start() - - def _transcribe_and_output( - self, - recording: AudioRecording, - output_target: TranscriptionTarget, - ) -> None: - current = threading.current_thread() - try: - print(f"Recording stopped ({recording.duration_s:.1f}s), transcribing...") - with self._transcribe_lock: - text = self._transcriber.transcribe(recording.path) - - if not text: - print("No speech recognized") - return - - if output_target == "openclaw": - dispatch = self._output.send_to_openclaw_result(text) - route = dispatch.route - dispatch_reason = dispatch.reason - else: - route = self._output.inject_or_clipboard( - text, - auto_paste=self._config.auto_paste, - ) - dispatch_reason = "n/a" - - device = self._transcriber.device_in_use - backend = self._transcriber.backend_in_use - - if output_target == "openclaw": - if route == "openclaw": - print( - f"Transcribed with {backend} on {device}, sent to OpenClaw ({dispatch_reason})" - ) - elif route == "clipboard": - print( - f"Transcribed with {backend} on {device}, OpenClaw unavailable so copied to clipboard ({dispatch_reason})" - ) - else: - print( - f"Transcribed with {backend} on {device}, but OpenClaw output was empty ({dispatch_reason})" - ) - return - - if route == "typed": - print( - f"Transcribed with {backend} on {device}, typed into focused input" - ) - elif route == "pasted": - print( - f"Transcribed with {backend} on {device}, pasted via system shortcut" - ) - elif route == "clipboard": - print(f"Transcribed with {backend} on {device}, copied to clipboard") - else: - print(f"Transcribed with {backend} on {device}, but output was empty") - except Exception as error: - print(f"Transcription failed: {error}") - finally: - self._safe_unlink(recording.path) - with self._workers_lock: - self._workers.discard(current) - - def _safe_unlink(self, path: Path) -> None: - try: - path.unlink(missing_ok=True) - except Exception as error: - print(f"Failed to remove temp audio file {path}: {error}") - - def _maybe_prewarm_transcriber(self) -> None: - if not self._config.prewarm_on_start or self._prewarm_started: - return - self._prewarm_started = True - - worker = threading.Thread( - target=self._prewarm_transcriber, - args=(self._config.prewarm_delay_s,), - daemon=True, - ) - worker.start() - - def _prewarm_transcriber(self, delay_s: float = 0.0) -> None: - if delay_s > 0: - print(f"Transcriber prewarm scheduled in {delay_s:.1f}s") - if self._stop_event.wait(timeout=delay_s): - return - - try: - self._transcriber.prewarm() - print("Transcriber prewarm complete") - except Exception as error: - print(f"Transcriber prewarm skipped: {error}") - - def _set_recording_status(self, is_recording: bool) -> None: - payload = { - "recording": is_recording, - "state": "recording" if is_recording else "idle", - } - path = self._config.status_file - tmp_path = path.with_suffix(path.suffix + ".tmp") - try: - path.parent.mkdir(parents=True, exist_ok=True) - _ = tmp_path.write_text(json.dumps(payload), encoding="utf-8") - _ = tmp_path.replace(path) - except Exception: - return diff --git a/vibemouse/audio.py b/vibemouse/audio.py deleted file mode 100644 index 794eed1..0000000 --- a/vibemouse/audio.py +++ /dev/null @@ -1,157 +0,0 @@ -from __future__ import annotations - -import importlib -import threading -from collections.abc import Callable -from dataclasses import dataclass -from pathlib import Path -from typing import Protocol, cast -from uuid import uuid4 - -import numpy as np -from numpy.typing import NDArray - - -AudioFrame = NDArray[np.float32] - - -@dataclass -class AudioRecording: - path: Path - duration_s: float - - -class _AudioStream(Protocol): - def start(self) -> None: ... - - def stop(self) -> None: ... - - def close(self) -> None: ... - - -class _SoundDeviceModule(Protocol): - def InputStream( - self, - *, - samplerate: int, - channels: int, - dtype: str, - callback: Callable[[AudioFrame, int, object, object], None], - ) -> _AudioStream: ... - - -class _SoundFileModule(Protocol): - def write(self, file: str | Path, data: AudioFrame, samplerate: int) -> None: ... - - -class AudioRecorder: - def __init__( - self, sample_rate: int, channels: int, dtype: str, temp_dir: Path - ) -> None: - self._sample_rate: int = sample_rate - self._channels: int = channels - self._dtype: str = dtype - self._temp_dir: Path = temp_dir - self._sd: _SoundDeviceModule | None = None - self._sf: _SoundFileModule | None = None - self._lock: threading.Lock = threading.Lock() - self._frames: list[AudioFrame] = [] - self._stream: _AudioStream | None = None - self._recording: bool = False - - @property - def is_recording(self) -> bool: - with self._lock: - return self._recording - - def start(self) -> None: - self._ensure_audio_modules() - with self._lock: - if self._recording: - return - try: - self._temp_dir.mkdir(parents=True, exist_ok=True) - except OSError as error: - raise RuntimeError( - f"Failed to create temp audio directory {self._temp_dir}: {error}" - ) from error - self._frames = [] - if self._sd is None: - raise RuntimeError("Audio input module not initialized") - stream = self._sd.InputStream( - samplerate=self._sample_rate, - channels=self._channels, - dtype=self._dtype, - callback=self._callback, - ) - stream.start() - self._stream = stream - self._recording = True - - def stop_and_save(self) -> AudioRecording | None: - with self._lock: - if not self._recording: - return None - stream = self._stream - self._stream = None - self._recording = False - - if stream is not None: - stream.stop() - stream.close() - - with self._lock: - if not self._frames: - return None - audio = np.concatenate(self._frames, axis=0) - self._frames = [] - - out_path = self._temp_dir / f"recording_{uuid4().hex}.wav" - if self._sf is None: - raise RuntimeError("Audio write module not initialized") - try: - self._sf.write(out_path, audio, self._sample_rate) - except Exception as error: - raise RuntimeError( - f"Failed to write recording to {out_path}: {error}" - ) from error - duration = float(len(audio) / self._sample_rate) - return AudioRecording(path=out_path, duration_s=duration) - - def cancel(self) -> None: - with self._lock: - if not self._recording: - self._frames = [] - return - stream = self._stream - self._stream = None - self._recording = False - self._frames = [] - - if stream is not None: - stream.stop() - stream.close() - - def _callback( - self, indata: AudioFrame, frames: int, time_data: object, status: object - ) -> None: - del frames - del time_data - del status - with self._lock: - if self._recording: - self._frames.append(indata.copy()) - - def _ensure_audio_modules(self) -> None: - if self._sd is not None and self._sf is not None: - return - try: - sounddevice_module = importlib.import_module("sounddevice") - soundfile_module = importlib.import_module("soundfile") - except Exception as error: - raise RuntimeError( - "Audio dependencies missing. Install sounddevice and soundfile." - ) from error - - self._sd = cast(_SoundDeviceModule, cast(object, sounddevice_module)) - self._sf = cast(_SoundFileModule, cast(object, soundfile_module)) diff --git a/vibemouse/config.py b/vibemouse/config.py deleted file mode 100644 index 8f46307..0000000 --- a/vibemouse/config.py +++ /dev/null @@ -1,243 +0,0 @@ -from __future__ import annotations - -import os -import tempfile -from dataclasses import dataclass -from pathlib import Path - - -def _read_bool(name: str, default: bool) -> bool: - raw = os.getenv(name) - if raw is None: - return default - return raw.strip().lower() in {"1", "true", "yes", "on"} - - -def _read_int(name: str, default: int) -> int: - raw = os.getenv(name) - if raw is None: - return default - try: - return int(raw.strip()) - except ValueError as error: - raise ValueError(f"{name} must be an integer, got {raw!r}") from error - - -def _read_float(name: str, default: float) -> float: - raw = os.getenv(name) - if raw is None: - return default - try: - return float(raw.strip()) - except ValueError as error: - raise ValueError(f"{name} must be a float, got {raw!r}") from error - - -def _read_button(name: str, default: str) -> str: - value = os.getenv(name, default).strip().lower() - if value not in {"x1", "x2"}: - raise ValueError(f"{name} must be either 'x1' or 'x2', got {value!r}") - return value - - -def _require_positive(name: str, value: int) -> int: - if value <= 0: - raise ValueError(f"{name} must be a positive integer, got {value}") - return value - - -def _require_non_negative(name: str, value: int) -> int: - if value < 0: - raise ValueError(f"{name} must be a non-negative integer, got {value}") - return value - - -def _require_positive_float(name: str, value: float) -> float: - if value <= 0: - raise ValueError(f"{name} must be a positive float, got {value}") - return value - - -def _require_non_negative_float(name: str, value: float) -> float: - if value < 0: - raise ValueError(f"{name} must be a non-negative float, got {value}") - return value - - -def _read_choice(name: str, default: str, allowed: set[str]) -> str: - value = os.getenv(name, default).strip().lower() - if value not in allowed: - options = ", ".join(sorted(allowed)) - raise ValueError(f"{name} must be one of: {options}; got {value!r}") - return value - - -@dataclass(frozen=True) -class AppConfig: - sample_rate: int - channels: int - dtype: str - transcriber_backend: str - model_name: str - device: str - language: str - use_itn: bool - enable_vad: bool - vad_max_single_segment_ms: int - merge_vad: bool - merge_length_s: int - fallback_to_cpu: bool - button_debounce_ms: int - gestures_enabled: bool - gesture_trigger_button: str - gesture_threshold_px: int - gesture_freeze_pointer: bool - gesture_restore_cursor: bool - gesture_up_action: str - gesture_down_action: str - gesture_left_action: str - gesture_right_action: str - enter_mode: str - auto_paste: bool - trust_remote_code: bool - prewarm_on_start: bool - prewarm_delay_s: float - status_file: Path - openclaw_command: str - openclaw_agent: str | None - openclaw_timeout_s: float - openclaw_retries: int - front_button: str - rear_button: str - temp_dir: Path - - -def load_config() -> AppConfig: - temp_dir = Path( - os.getenv("VIBEMOUSE_TEMP_DIR", str(Path(tempfile.gettempdir()) / "vibemouse")) - ) - runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", tempfile.gettempdir())) - status_file = Path( - os.getenv("VIBEMOUSE_STATUS_FILE", str(runtime_dir / "vibemouse-status.json")) - ) - - sample_rate = _require_positive( - "VIBEMOUSE_SAMPLE_RATE", _read_int("VIBEMOUSE_SAMPLE_RATE", 16000) - ) - channels = _require_positive( - "VIBEMOUSE_CHANNELS", _read_int("VIBEMOUSE_CHANNELS", 1) - ) - vad_max_segment_ms = _require_positive( - "VIBEMOUSE_VAD_MAX_SEGMENT_MS", _read_int("VIBEMOUSE_VAD_MAX_SEGMENT_MS", 30000) - ) - merge_length_s = _require_positive( - "VIBEMOUSE_MERGE_LENGTH_S", _read_int("VIBEMOUSE_MERGE_LENGTH_S", 15) - ) - front_button = _read_button("VIBEMOUSE_FRONT_BUTTON", "x1") - rear_button = _read_button("VIBEMOUSE_REAR_BUTTON", "x2") - if front_button == rear_button: - raise ValueError("VIBEMOUSE_FRONT_BUTTON and VIBEMOUSE_REAR_BUTTON must differ") - button_debounce_ms = _require_non_negative( - "VIBEMOUSE_BUTTON_DEBOUNCE_MS", - _read_int("VIBEMOUSE_BUTTON_DEBOUNCE_MS", 150), - ) - gestures_enabled = _read_bool("VIBEMOUSE_GESTURES_ENABLED", False) - gesture_trigger_button = _read_choice( - "VIBEMOUSE_GESTURE_TRIGGER_BUTTON", - "rear", - {"front", "rear", "right"}, - ) - gesture_threshold_px = _require_positive( - "VIBEMOUSE_GESTURE_THRESHOLD_PX", - _read_int("VIBEMOUSE_GESTURE_THRESHOLD_PX", 120), - ) - gesture_freeze_pointer = _read_bool("VIBEMOUSE_GESTURE_FREEZE_POINTER", True) - gesture_restore_cursor = _read_bool("VIBEMOUSE_GESTURE_RESTORE_CURSOR", True) - gesture_actions = { - "record_toggle", - "send_enter", - "workspace_left", - "workspace_right", - "noop", - } - gesture_up_action = _read_choice( - "VIBEMOUSE_GESTURE_UP_ACTION", - "record_toggle", - gesture_actions, - ) - gesture_down_action = _read_choice( - "VIBEMOUSE_GESTURE_DOWN_ACTION", - "noop", - gesture_actions, - ) - gesture_left_action = _read_choice( - "VIBEMOUSE_GESTURE_LEFT_ACTION", - "noop", - gesture_actions, - ) - gesture_right_action = _read_choice( - "VIBEMOUSE_GESTURE_RIGHT_ACTION", - "send_enter", - gesture_actions, - ) - enter_mode = _read_choice( - "VIBEMOUSE_ENTER_MODE", - "enter", - {"enter", "ctrl_enter", "shift_enter", "none"}, - ) - openclaw_command = os.getenv("VIBEMOUSE_OPENCLAW_COMMAND", "openclaw").strip() - if not openclaw_command: - raise ValueError("VIBEMOUSE_OPENCLAW_COMMAND must not be empty") - openclaw_agent_raw = os.getenv("VIBEMOUSE_OPENCLAW_AGENT", "main").strip() - openclaw_agent = openclaw_agent_raw if openclaw_agent_raw else None - openclaw_timeout_s = _require_positive_float( - "VIBEMOUSE_OPENCLAW_TIMEOUT_S", - _read_float("VIBEMOUSE_OPENCLAW_TIMEOUT_S", 20.0), - ) - openclaw_retries = _require_non_negative( - "VIBEMOUSE_OPENCLAW_RETRIES", - _read_int("VIBEMOUSE_OPENCLAW_RETRIES", 0), - ) - prewarm_delay_s = _require_non_negative_float( - "VIBEMOUSE_PREWARM_DELAY_S", - _read_float("VIBEMOUSE_PREWARM_DELAY_S", 0.0), - ) - - return AppConfig( - sample_rate=sample_rate, - channels=channels, - dtype=os.getenv("VIBEMOUSE_DTYPE", "float32"), - transcriber_backend=os.getenv("VIBEMOUSE_BACKEND", "auto").strip().lower(), - model_name=os.getenv("VIBEMOUSE_MODEL", "iic/SenseVoiceSmall"), - device=os.getenv("VIBEMOUSE_DEVICE", "cpu"), - language=os.getenv("VIBEMOUSE_LANGUAGE", "auto"), - use_itn=_read_bool("VIBEMOUSE_USE_ITN", True), - enable_vad=_read_bool("VIBEMOUSE_ENABLE_VAD", True), - vad_max_single_segment_ms=vad_max_segment_ms, - merge_vad=_read_bool("VIBEMOUSE_MERGE_VAD", True), - merge_length_s=merge_length_s, - fallback_to_cpu=_read_bool("VIBEMOUSE_FALLBACK_CPU", True), - button_debounce_ms=button_debounce_ms, - gestures_enabled=gestures_enabled, - gesture_trigger_button=gesture_trigger_button, - gesture_threshold_px=gesture_threshold_px, - gesture_freeze_pointer=gesture_freeze_pointer, - gesture_restore_cursor=gesture_restore_cursor, - gesture_up_action=gesture_up_action, - gesture_down_action=gesture_down_action, - gesture_left_action=gesture_left_action, - gesture_right_action=gesture_right_action, - enter_mode=enter_mode, - auto_paste=_read_bool("VIBEMOUSE_AUTO_PASTE", False), - trust_remote_code=_read_bool("VIBEMOUSE_TRUST_REMOTE_CODE", False), - prewarm_on_start=_read_bool("VIBEMOUSE_PREWARM_ON_START", True), - prewarm_delay_s=prewarm_delay_s, - status_file=status_file, - openclaw_command=openclaw_command, - openclaw_agent=openclaw_agent, - openclaw_timeout_s=openclaw_timeout_s, - openclaw_retries=openclaw_retries, - front_button=front_button, - rear_button=rear_button, - temp_dir=temp_dir, - ) diff --git a/vibemouse/deploy.py b/vibemouse/deploy.py deleted file mode 100644 index d04b5be..0000000 --- a/vibemouse/deploy.py +++ /dev/null @@ -1,255 +0,0 @@ -from __future__ import annotations - -import argparse -import shlex -import shutil -import subprocess -import sys -from pathlib import Path -from typing import cast - -from vibemouse.doctor import run_doctor - - -_PRESET_OVERRIDES: dict[str, dict[str, str]] = { - "stable": { - "VIBEMOUSE_AUTO_PASTE": "true", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "220", - "VIBEMOUSE_PREWARM_ON_START": "true", - "VIBEMOUSE_OPENCLAW_RETRIES": "1", - }, - "fast": { - "VIBEMOUSE_AUTO_PASTE": "true", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "120", - "VIBEMOUSE_PREWARM_ON_START": "true", - "VIBEMOUSE_OPENCLAW_RETRIES": "2", - }, - "low-resource": { - "VIBEMOUSE_AUTO_PASTE": "false", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "250", - "VIBEMOUSE_PREWARM_ON_START": "false", - "VIBEMOUSE_OPENCLAW_RETRIES": "0", - }, -} - - -def configure_deploy_parser(parser: argparse.ArgumentParser) -> None: - _ = parser.add_argument( - "--preset", - choices=sorted(_PRESET_OVERRIDES.keys()), - default="stable", - help="deployment preset profile", - ) - _ = parser.add_argument( - "--env-file", - default=str(Path.home() / ".config" / "vibemouse" / "deploy.env"), - help="path to generated EnvironmentFile", - ) - _ = parser.add_argument( - "--service-file", - default=str(Path.home() / ".config" / "systemd" / "user" / "vibemouse.service"), - help="path to generated systemd user service file", - ) - _ = parser.add_argument( - "--openclaw-command", - default=shutil.which("openclaw") or "openclaw", - help="OpenClaw command prefix", - ) - _ = parser.add_argument( - "--openclaw-agent", - default="main", - help="OpenClaw agent id used for rear-button routing", - ) - _ = parser.add_argument( - "--openclaw-retries", - type=int, - default=None, - help="override retries for OpenClaw spawn failures", - ) - _ = parser.add_argument( - "--exec-start", - default=None, - help="override ExecStart command", - ) - _ = parser.add_argument( - "--skip-systemctl", - action="store_true", - help="skip systemctl enable/restart operations", - ) - _ = parser.add_argument( - "--dry-run", - action="store_true", - help="print plan without writing files", - ) - - -def run_deploy(args: argparse.Namespace) -> int: - preset = str(getattr(args, "preset", "stable")) - if preset not in _PRESET_OVERRIDES: - print(f"Unknown preset: {preset}") - return 1 - - openclaw_command = str(getattr(args, "openclaw_command", "openclaw")).strip() - if not openclaw_command: - print("--openclaw-command must not be empty") - return 1 - - openclaw_agent = str(getattr(args, "openclaw_agent", "main")).strip() or "main" - - retries_override = cast(int | None, getattr(args, "openclaw_retries", None)) - - if retries_override is not None and retries_override < 0: - print("--openclaw-retries must be non-negative") - return 1 - - env_path = Path(str(getattr(args, "env_file", ""))).expanduser() - service_path = Path(str(getattr(args, "service_file", ""))).expanduser() - exec_start = _resolve_exec_start(str(getattr(args, "exec_start", "") or "")) - - env_map = build_deploy_env( - preset=preset, - openclaw_command=openclaw_command, - openclaw_agent=openclaw_agent, - openclaw_retries=retries_override, - ) - env_content = render_env_file(env_map) - service_content = render_service_file( - env_file=env_path, - exec_start=exec_start, - ) - - dry_run = bool(getattr(args, "dry_run", False)) - if dry_run: - print(f"[DRY-RUN] would write {env_path}") - print(f"[DRY-RUN] would write {service_path}") - print(f"[DRY-RUN] preset={preset}") - print(f"[DRY-RUN] exec_start={exec_start}") - return 0 - - _write_text(env_path, env_content) - _write_text(service_path, service_content) - print(f"Wrote {env_path}") - print(f"Wrote {service_path}") - - if not bool(getattr(args, "skip_systemctl", False)): - service_name = service_path.name - if not _run_systemctl(["daemon-reload"]): - return 1 - if not _run_systemctl(["enable", "--now", service_name]): - return 1 - if not _run_systemctl(["is-active", service_name]): - return 1 - - print("Running doctor checks...") - return run_doctor() - - -def build_deploy_env( - *, - preset: str, - openclaw_command: str, - openclaw_agent: str, - openclaw_retries: int | None, -) -> dict[str, str]: - base = { - "VIBEMOUSE_BACKEND": "auto", - "VIBEMOUSE_DEVICE": "cpu", - "VIBEMOUSE_FALLBACK_CPU": "true", - "VIBEMOUSE_ENTER_MODE": "enter", - "VIBEMOUSE_OPENCLAW_COMMAND": openclaw_command, - "VIBEMOUSE_OPENCLAW_AGENT": openclaw_agent, - "VIBEMOUSE_OPENCLAW_TIMEOUT_S": "20.0", - "VIBEMOUSE_STATUS_FILE": "%t/vibemouse-status.json", - } - base.update(_PRESET_OVERRIDES[preset]) - if openclaw_retries is not None: - base["VIBEMOUSE_OPENCLAW_RETRIES"] = str(openclaw_retries) - return base - - -def render_env_file(env_map: dict[str, str]) -> str: - lines = [ - "# Generated by `vibemouse deploy`.", - "# Edit values if needed, then: systemctl --user restart vibemouse.service", - ] - for key in sorted(env_map.keys()): - lines.append(f"{key}={_quote_env_value(env_map[key])}") - lines.append("") - return "\n".join(lines) - - -def render_service_file(*, env_file: Path, exec_start: str) -> str: - lines = [ - "[Unit]", - "Description=VibeMouse voice input service", - "After=graphical-session.target", - "PartOf=graphical-session.target", - "", - "[Service]", - "Type=simple", - f"EnvironmentFile={env_file}", - f"ExecStart={exec_start}", - "Restart=on-failure", - "RestartSec=2", - "", - "[Install]", - "WantedBy=default.target", - "", - ] - return "\n".join(lines) - - -def _quote_env_value(value: str) -> str: - escaped = value.replace("\\", "\\\\").replace('"', '\\"') - return f'"{escaped}"' - - -def _resolve_exec_start(raw_exec_start: str) -> str: - cleaned = raw_exec_start.strip() - if cleaned: - return cleaned - - vibemouse_bin = shutil.which("vibemouse") - if vibemouse_bin: - return f"{vibemouse_bin} run" - - python_bin = sys.executable - return f"{python_bin} -m vibemouse.main run" - - -def _write_text(path: Path, content: str) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - _ = path.write_text(content, encoding="utf-8") - - -def _run_systemctl(args: list[str]) -> bool: - cmd = ["systemctl", "--user", *args] - try: - proc = subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - timeout=12.0, - ) - except (OSError, subprocess.TimeoutExpired) as error: - print(f"Failed to run {' '.join(cmd)}: {error}") - return False - - if proc.returncode == 0: - return True - - stderr = proc.stderr.strip() - if stderr: - print(f"systemctl {' '.join(args)} failed: {stderr}") - else: - print(f"systemctl {' '.join(args)} failed with code {proc.returncode}") - return False - - -def validate_openclaw_command(raw: str) -> bool: - try: - parts = shlex.split(raw) - except ValueError: - return False - return bool(parts) diff --git a/vibemouse/doctor.py b/vibemouse/doctor.py deleted file mode 100644 index 3cdbac1..0000000 --- a/vibemouse/doctor.py +++ /dev/null @@ -1,610 +0,0 @@ -from __future__ import annotations - -import importlib -import json -import shlex -import shutil -import subprocess -import sys -from collections.abc import Iterable, Mapping -from dataclasses import dataclass -from pathlib import Path - -from vibemouse.config import AppConfig, load_config - - -@dataclass(frozen=True) -class DoctorCheck: - name: str - status: str - detail: str - - -def run_doctor(*, apply_fixes: bool = False) -> int: - if apply_fixes: - _apply_doctor_fixes() - - checks: list[DoctorCheck] = [] - - config_check, config = _check_config_load() - checks.append(config_check) - - if config is not None: - checks.extend(_check_openclaw(config)) - - checks.append(_check_audio_input(config)) - checks.append(_check_input_device_permissions(config)) - - checks.append(_check_hyprland_return_bind_conflict(config)) - checks.append(_check_user_service_state()) - - _print_checks(checks) - - fail_count = sum(1 for check in checks if check.status == "fail") - warn_count = sum(1 for check in checks if check.status == "warn") - print(f"Doctor summary: {len(checks)} checks, {fail_count} fail, {warn_count} warn") - return 1 if fail_count else 0 - - -def _apply_doctor_fixes() -> None: - _fix_hyprland_return_bind_conflict() - _ensure_user_service_active() - - -def _fix_hyprland_return_bind_conflict() -> None: - bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" - if not bind_path.exists(): - return - - try: - lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() - except OSError: - return - - changed = False - rewritten: list[str] = [] - for line in lines: - stripped = line.strip() - if ( - stripped.startswith("#") - or "sendshortcut" not in stripped - or "Return" not in stripped - ): - rewritten.append(line) - continue - - if "mouse:275" in stripped or "mouse:276" in stripped: - rewritten.append(f"# {line} # auto-disabled by vibemouse doctor --fix") - changed = True - continue - - rewritten.append(line) - - if not changed: - return - - try: - bind_path.write_text("\n".join(rewritten) + "\n", encoding="utf-8") - except OSError: - return - - _ = _run_subprocess( - ["hyprctl", "reload", "config-only"], - timeout=3.0, - ) - - -def _ensure_user_service_active() -> None: - probe = _run_subprocess( - ["systemctl", "--user", "is-active", "vibemouse.service"], - timeout=3.0, - ) - if probe is None: - return - if probe.returncode == 0 and probe.stdout.strip() == "active": - return - - _ = _run_subprocess( - ["systemctl", "--user", "restart", "vibemouse.service"], - timeout=8.0, - ) - - -def _check_config_load() -> tuple[DoctorCheck, AppConfig | None]: - try: - config = load_config() - except Exception as error: - return ( - DoctorCheck( - name="config", - status="fail", - detail=f"failed to load config: {error}", - ), - None, - ) - - return ( - DoctorCheck( - name="config", - status="ok", - detail=( - "loaded " - + f"front={config.front_button}, rear={config.rear_button}, " - + f"openclaw_agent={config.openclaw_agent or 'none'}" - ), - ), - config, - ) - - -def _check_openclaw(config: AppConfig) -> list[DoctorCheck]: - checks: list[DoctorCheck] = [] - - command_parts = _parse_openclaw_command(config.openclaw_command) - if command_parts is None: - checks.append( - DoctorCheck( - name="openclaw-command", - status="fail", - detail="invalid VIBEMOUSE_OPENCLAW_COMMAND shell syntax", - ) - ) - return checks - - executable = command_parts[0] - resolved = shutil.which(executable) - if resolved is None: - checks.append( - DoctorCheck( - name="openclaw-command", - status="fail", - detail=f"executable not found in PATH: {executable}", - ) - ) - return checks - - checks.append( - DoctorCheck( - name="openclaw-command", - status="ok", - detail=f"resolved executable: {resolved}", - ) - ) - - configured_agent = config.openclaw_agent - if not configured_agent: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="no agent configured; set VIBEMOUSE_OPENCLAW_AGENT", - ) - ) - return checks - - probe_cmd = [*command_parts, "agents", "list", "--json"] - try: - probe = subprocess.run( - probe_cmd, - capture_output=True, - text=True, - check=False, - timeout=8.0, - ) - except subprocess.TimeoutExpired: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="timed out while probing available agents", - ) - ) - return checks - except OSError as error: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=f"failed to run agent probe: {error}", - ) - ) - return checks - - if probe.returncode != 0: - stderr = probe.stderr.strip() - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=( - "agent probe failed" - if not stderr - else f"agent probe failed: {stderr}" - ), - ) - ) - return checks - - try: - payload = json.loads(probe.stdout) - except json.JSONDecodeError: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="agent probe returned invalid JSON", - ) - ) - return checks - - if not isinstance(payload, list): - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="agent probe returned unexpected payload shape", - ) - ) - return checks - - available_agents = { - str(entry.get("id", "")).strip() for entry in payload if isinstance(entry, dict) - } - if configured_agent in available_agents: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="ok", - detail=f"configured agent exists: {configured_agent}", - ) - ) - else: - sample = ", ".join(sorted(agent for agent in available_agents if agent)[:5]) - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=( - f"configured agent not found: {configured_agent}; " - + (f"available: {sample}" if sample else "no agents listed") - ), - ) - ) - - return checks - - -def _check_audio_input(config: AppConfig | None) -> DoctorCheck: - try: - sounddevice = importlib.import_module("sounddevice") - except Exception as error: - return DoctorCheck( - name="audio-input", - status="fail", - detail=f"cannot import sounddevice: {error}", - ) - - query_devices = getattr(sounddevice, "query_devices", None) - if not callable(query_devices): - return DoctorCheck( - name="audio-input", - status="fail", - detail="sounddevice.query_devices is unavailable", - ) - - try: - devices_obj = query_devices() - except Exception as error: - return DoctorCheck( - name="audio-input", - status="fail", - detail=f"failed to query audio devices: {error}", - ) - - device_entries = _coerce_device_entries(devices_obj) - if device_entries is None: - return DoctorCheck( - name="audio-input", - status="warn", - detail="unexpected audio device payload shape", - ) - - input_devices: list[Mapping[str, object]] = [] - for item in device_entries: - max_inputs = _to_float(item.get("max_input_channels", 0.0)) - if max_inputs > 0: - input_devices.append(item) - if not input_devices: - return DoctorCheck( - name="audio-input", - status="fail", - detail="no input-capable microphone device detected", - ) - - default_index = _read_default_input_device_index(sounddevice) - check_input_settings = getattr(sounddevice, "check_input_settings", None) - if default_index is not None and callable(check_input_settings): - sample_rate = float(config.sample_rate) if config is not None else 16000.0 - channels = config.channels if config is not None else 1 - try: - _ = check_input_settings( - device=default_index, - channels=max(1, int(channels)), - samplerate=sample_rate, - ) - except Exception as error: - return DoctorCheck( - name="audio-input", - status="warn", - detail=f"default input exists but validation failed: {error}", - ) - - return DoctorCheck( - name="audio-input", - status="ok", - detail=f"detected {len(input_devices)} input-capable device(s)", - ) - - -def _check_input_device_permissions(config: AppConfig | None) -> DoctorCheck: - if not sys.platform.startswith("linux"): - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="raw input permission check is only available on Linux", - ) - - try: - evdev_module = importlib.import_module("evdev") - except Exception as error: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=f"cannot import evdev for raw input check: {error}", - ) - - list_devices = getattr(evdev_module, "list_devices", None) - input_device_ctor = getattr(evdev_module, "InputDevice", None) - ecodes = getattr(evdev_module, "ecodes", None) - if not callable(list_devices) or input_device_ctor is None or ecodes is None: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="evdev module is missing required APIs", - ) - - try: - device_paths_obj = list_devices() - except Exception as error: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=f"failed to list /dev/input devices: {error}", - ) - - if not isinstance(device_paths_obj, list): - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="unexpected device-path payload from evdev", - ) - - device_paths = [str(path) for path in device_paths_obj] - if not device_paths: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="no /dev/input/event* devices were found", - ) - - ev_key = int(getattr(ecodes, "EV_KEY", 1)) - btn_side = int(getattr(ecodes, "BTN_SIDE", 0x116)) - btn_extra = int(getattr(ecodes, "BTN_EXTRA", 0x117)) - side_button_codes = {btn_side, btn_extra} - - accessible = 0 - side_capable = 0 - permission_denied = 0 - - for path in device_paths: - try: - device = input_device_ctor(path) - except PermissionError: - permission_denied += 1 - continue - except Exception: - continue - - try: - capabilities_obj = device.capabilities() - accessible += 1 - if isinstance(capabilities_obj, dict): - keys_obj = capabilities_obj.get(ev_key, []) - keys = {int(code) for code in keys_obj if isinstance(code, int)} - if side_button_codes & keys: - side_capable += 1 - finally: - try: - device.close() - except Exception: - pass - - if accessible == 0 and permission_denied > 0: - return DoctorCheck( - name="input-device-permissions", - status="fail", - detail=( - "cannot access /dev/input event devices (permission denied); " - + "add user to input group or configure udev rules" - ), - ) - - if accessible == 0: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="no readable /dev/input event devices were found", - ) - - rear_button = config.rear_button if config is not None else "x2" - if side_capable == 0: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=( - f"{accessible} input device(s) readable but none expose side-button codes " - + f"for rear={rear_button}" - ), - ) - - return DoctorCheck( - name="input-device-permissions", - status="ok", - detail=( - f"{accessible} readable input device(s), " - + f"{side_capable} with side-button capability" - ), - ) - - -def _read_default_input_device_index(sounddevice: object) -> int | None: - default_obj = getattr(sounddevice, "default", None) - if default_obj is None: - return None - - device_attr = getattr(default_obj, "device", None) - if not isinstance(device_attr, tuple | list) or len(device_attr) < 1: - return None - - raw_input_index = device_attr[0] - if not isinstance(raw_input_index, int): - return None - if raw_input_index < 0: - return None - return raw_input_index - - -def _coerce_device_entries(devices_obj: object) -> list[Mapping[str, object]] | None: - if isinstance(devices_obj, list): - return [entry for entry in devices_obj if isinstance(entry, Mapping)] - - if isinstance(devices_obj, Iterable): - entries: list[Mapping[str, object]] = [] - for entry in devices_obj: - if isinstance(entry, Mapping): - entries.append(entry) - return entries - - return None - - -def _to_float(value: object) -> float: - if isinstance(value, int | float): - return float(value) - if isinstance(value, str): - try: - return float(value.strip()) - except ValueError: - return 0.0 - return 0.0 - - -def _check_hyprland_return_bind_conflict(config: AppConfig | None) -> DoctorCheck: - bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" - if not bind_path.exists(): - return DoctorCheck( - name="hyprland-bind-conflict", - status="warn", - detail=f"file not found: {bind_path}", - ) - - rear_button = config.rear_button if config is not None else "x2" - rear_mouse_code = "mouse:275" if rear_button == "x1" else "mouse:276" - - lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() - for idx, raw_line in enumerate(lines, start=1): - line = raw_line.strip() - if not line or line.startswith("#"): - continue - if rear_mouse_code in line and "sendshortcut" in line and "Return" in line: - return DoctorCheck( - name="hyprland-bind-conflict", - status="fail", - detail=( - f"conflicting return bind found at {bind_path}:{idx}; " - + "disable it to let VibeMouse control rear-button behavior" - ), - ) - - return DoctorCheck( - name="hyprland-bind-conflict", - status="ok", - detail=f"no conflicting {rear_mouse_code} return bind found", - ) - - -def _check_user_service_state() -> DoctorCheck: - probe = _run_subprocess( - ["systemctl", "--user", "is-active", "vibemouse.service"], - timeout=3.0, - ) - if probe is None: - return DoctorCheck( - name="user-service", - status="warn", - detail="could not query service state", - ) - - state = probe.stdout.strip() or "unknown" - if state == "active": - return DoctorCheck( - name="user-service", - status="ok", - detail="vibemouse.service is active", - ) - - return DoctorCheck( - name="user-service", - status="warn", - detail=f"vibemouse.service state is {state}", - ) - - -def _run_subprocess( - cmd: list[str], - *, - timeout: float, -) -> subprocess.CompletedProcess[str] | None: - try: - return subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - -def _parse_openclaw_command(raw: str) -> list[str] | None: - cleaned = raw.strip() - if not cleaned: - return None - try: - parts = shlex.split(cleaned) - except ValueError: - return None - if not parts: - return None - return parts - - -def _print_checks(checks: list[DoctorCheck]) -> None: - for check in checks: - badge = { - "ok": "[OK]", - "warn": "[WARN]", - "fail": "[FAIL]", - }.get(check.status, "[INFO]") - print(f"{badge} {check.name}: {check.detail}") diff --git a/vibemouse/main.py b/vibemouse/main.py deleted file mode 100644 index 839419c..0000000 --- a/vibemouse/main.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import argparse - -from vibemouse.app import VoiceMouseApp -from vibemouse.config import load_config -from vibemouse.deploy import configure_deploy_parser, run_deploy -from vibemouse.doctor import run_doctor - - -def _build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(prog="vibemouse") - subparsers = parser.add_subparsers(dest="command") - _ = subparsers.add_parser("run", help="run the voice-input daemon") - doctor_parser = subparsers.add_parser("doctor", help="run environment diagnostics") - _ = doctor_parser.add_argument( - "--fix", - action="store_true", - help="apply safe auto-remediations before running checks", - ) - deploy_parser = subparsers.add_parser( - "deploy", - help="generate service/env files and deploy as user service", - ) - configure_deploy_parser(deploy_parser) - return parser - - -def main(argv: list[str] | None = None) -> int: - parser = _build_parser() - args = parser.parse_args(argv) - - raw_command = getattr(args, "command", None) - command = raw_command if isinstance(raw_command, str) else "run" - if command == "doctor": - apply_fixes_raw = getattr(args, "fix", False) - apply_fixes = bool(apply_fixes_raw) - return run_doctor(apply_fixes=apply_fixes) - if command == "deploy": - return run_deploy(args) - - config = load_config() - app = VoiceMouseApp(config) - app.run() - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/vibemouse/mouse_listener.py b/vibemouse/mouse_listener.py deleted file mode 100644 index 0d2d950..0000000 --- a/vibemouse/mouse_listener.py +++ /dev/null @@ -1,511 +0,0 @@ -from __future__ import annotations - -import importlib -import json -import subprocess -import threading -import time -from collections.abc import Callable -from typing import Protocol, cast - -from vibemouse.system_integration import SystemIntegration, create_system_integration - - -ButtonCallback = Callable[[], None] -GestureCallback = Callable[[str], None] - - -class SideButtonListener: - def __init__( - self, - on_front_press: ButtonCallback, - on_rear_press: ButtonCallback, - front_button: str, - rear_button: str, - debounce_s: float = 0.15, - on_gesture: GestureCallback | None = None, - gestures_enabled: bool = False, - gesture_trigger_button: str = "rear", - gesture_threshold_px: int = 120, - gesture_freeze_pointer: bool = True, - gesture_restore_cursor: bool = True, - system_integration: SystemIntegration | None = None, - ) -> None: - if gesture_trigger_button not in {"front", "rear", "right"}: - raise ValueError( - "gesture_trigger_button must be one of: front, rear, right" - ) - self._on_front_press: ButtonCallback = on_front_press - self._on_rear_press: ButtonCallback = on_rear_press - self._on_gesture: GestureCallback | None = on_gesture - self._front_button: str = front_button - self._rear_button: str = rear_button - self._debounce_s: float = max(0.0, debounce_s) - self._gestures_enabled: bool = gestures_enabled - self._gesture_trigger_button: str = gesture_trigger_button - self._gesture_threshold_px: int = max(1, gesture_threshold_px) - self._gesture_freeze_pointer: bool = gesture_freeze_pointer - self._gesture_restore_cursor: bool = gesture_restore_cursor - self._system_integration: SystemIntegration = ( - system_integration - if system_integration is not None - else create_system_integration() - ) - self._hyprland_session: bool = self._system_integration.is_hyprland - self._last_front_press_monotonic: float = 0.0 - self._last_rear_press_monotonic: float = 0.0 - self._debounce_lock: threading.Lock = threading.Lock() - self._gesture_lock: threading.Lock = threading.Lock() - self._gesture_active: bool = False - self._gesture_dx: int = 0 - self._gesture_dy: int = 0 - self._gesture_last_position: tuple[int, int] | None = None - self._gesture_anchor_cursor: tuple[int, int] | None = None - self._gesture_grabbed_device: _EvdevDevice | None = None - self._stop: threading.Event = threading.Event() - self._thread: threading.Thread | None = None - - def start(self) -> None: - if self._thread is not None and self._thread.is_alive(): - return - self._stop.clear() - self._thread = threading.Thread(target=self._run, daemon=True) - self._thread.start() - - def stop(self) -> None: - self._stop.set() - self._release_gesture_grab() - if self._thread is not None: - self._thread.join(timeout=2) - - def _run(self) -> None: - last_error_summary: str | None = None - while not self._stop.is_set(): - try: - self._run_evdev() - return - except Exception as evdev_error: - try: - self._run_pynput() - return - except Exception as pynput_error: - summary = ( - "Mouse listener backends unavailable " - + f"(evdev: {evdev_error}; pynput: {pynput_error}). Retrying..." - ) - if summary != last_error_summary: - print(summary) - last_error_summary = summary - if self._stop.wait(1.0): - return - - def _run_evdev(self) -> None: - import select - - try: - evdev_module = importlib.import_module("evdev") - except Exception as error: - raise RuntimeError("evdev is not available") from error - - input_device_ctor = cast(_InputDeviceCtor, getattr(evdev_module, "InputDevice")) - ecodes = cast(_Ecodes, getattr(evdev_module, "ecodes")) - list_devices = cast(_ListDevicesFn, getattr(evdev_module, "list_devices")) - - side_codes = { - "x1": ecodes.BTN_SIDE, - "x2": ecodes.BTN_EXTRA, - } - front_code = side_codes[self._front_button] - rear_code = side_codes[self._rear_button] - trigger_code: int | None = None - if self._gestures_enabled and self._gesture_trigger_button == "right": - trigger_code = ecodes.BTN_RIGHT - - devices: list[_EvdevDevice] = [] - for path in list_devices(): - try: - dev = input_device_ctor(path) - except Exception: - continue - try: - caps = dev.capabilities() - key_cap = caps.get(ecodes.EV_KEY, []) - required_codes = {front_code, rear_code} - if trigger_code is not None: - required_codes.add(trigger_code) - if not any(code in key_cap for code in required_codes): - dev.close() - continue - - btn_mouse = getattr(ecodes, "BTN_MOUSE", None) - has_pointer_button = ecodes.BTN_LEFT in key_cap or ( - isinstance(btn_mouse, int) and btn_mouse in key_cap - ) - if not has_pointer_button: - dev.close() - continue - - devices.append(dev) - except Exception: - dev.close() - - if not devices: - raise RuntimeError("No input device with side-button capability found") - - try: - fd_map: dict[int, _EvdevDevice] = {dev.fd: dev for dev in devices} - while not self._stop.is_set(): - ready, _, _ = select.select(list(fd_map.keys()), [], [], 0.2) - for fd in ready: - dev = fd_map[fd] - for event in dev.read(): - if event.type == ecodes.EV_KEY: - button_label: str | None = None - if event.code == front_code: - button_label = "front" - elif event.code == rear_code: - button_label = "rear" - elif ( - trigger_code is not None and event.code == trigger_code - ): - button_label = "right" - - if button_label is None: - continue - - if ( - self._gestures_enabled - and self._is_gesture_trigger_button(button_label) - ): - if event.value == 1: - self._start_gesture_capture(source_device=dev) - elif event.value == 0: - self._finish_gesture_capture(button_label) - continue - - if event.value == 1: - self._dispatch_click(button_label) - continue - - if ( - self._gestures_enabled - and event.type == ecodes.EV_REL - and self._gesture_active - ): - if event.code == ecodes.REL_X: - self._accumulate_gesture_delta(dx=event.value, dy=0) - elif event.code == ecodes.REL_Y: - self._accumulate_gesture_delta(dx=0, dy=event.value) - finally: - self._release_gesture_grab() - for dev in devices: - dev.close() - - def _run_pynput(self) -> None: - try: - mouse_module = importlib.import_module("pynput.mouse") - except Exception as error: - raise RuntimeError("pynput.mouse is not available") from error - - listener_ctor = cast(_MouseListenerCtor, getattr(mouse_module, "Listener")) - - button_map = { - "x1": {"x1", "x_button1", "button8"}, - "x2": {"x2", "x_button2", "button9"}, - } - - front_candidates = button_map[self._front_button] - rear_candidates = button_map[self._rear_button] - right_candidates = {"right", "button2"} - - def on_click(x: int, y: int, button: object, pressed: bool) -> None: - btn_name = str(button).lower().split(".")[-1] - button_label: str | None = None - if btn_name in front_candidates: - button_label = "front" - elif btn_name in rear_candidates: - button_label = "rear" - elif btn_name in right_candidates: - button_label = "right" - - if button_label is None: - return - - if self._gestures_enabled and self._is_gesture_trigger_button(button_label): - if pressed: - self._start_gesture_capture(initial_position=(x, y)) - else: - self._finish_gesture_capture(button_label) - return - - if pressed: - self._dispatch_click(button_label) - - def on_move(x: int, y: int) -> None: - if not self._gestures_enabled: - return - self._accumulate_gesture_position(x, y) - - listener = listener_ctor(on_click=on_click, on_move=on_move) - listener.start() - try: - while not self._stop.is_set(): - time.sleep(0.2) - finally: - listener.stop() - - def _dispatch_click(self, button_label: str) -> None: - if button_label == "front": - self._dispatch_front_press() - return - if button_label == "rear": - self._dispatch_rear_press() - return - - def _is_gesture_trigger_button(self, button_label: str) -> bool: - return button_label == self._gesture_trigger_button - - def _start_gesture_capture( - self, - *, - initial_position: tuple[int, int] | None = None, - source_device: _EvdevDevice | None = None, - ) -> None: - should_grab = False - with self._gesture_lock: - self._gesture_active = True - self._gesture_dx = 0 - self._gesture_dy = 0 - self._gesture_last_position = initial_position - if self._gesture_restore_cursor: - self._gesture_anchor_cursor = self._read_cursor_position() - else: - self._gesture_anchor_cursor = None - should_grab = self._gesture_freeze_pointer and source_device is not None - - if should_grab and source_device is not None: - self._try_grab_device(source_device) - - def _accumulate_gesture_delta(self, *, dx: int, dy: int) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - self._gesture_dx += dx - self._gesture_dy += dy - - def _accumulate_gesture_position(self, x: int, y: int) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - if self._gesture_last_position is None: - self._gesture_last_position = (x, y) - return - last_x, last_y = self._gesture_last_position - self._gesture_dx += x - last_x - self._gesture_dy += y - last_y - self._gesture_last_position = (x, y) - - def _finish_gesture_capture(self, button_label: str) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - dx = self._gesture_dx - dy = self._gesture_dy - self._gesture_active = False - self._gesture_dx = 0 - self._gesture_dy = 0 - self._gesture_last_position = None - anchor_cursor = self._gesture_anchor_cursor - self._gesture_anchor_cursor = None - - self._release_gesture_grab() - - direction = self._classify_gesture(dx, dy, self._gesture_threshold_px) - if direction is None: - self._dispatch_click(button_label) - return - self._dispatch_gesture(direction) - if anchor_cursor is not None: - self._restore_cursor_position(anchor_cursor) - - def _dispatch_gesture(self, direction: str) -> None: - callback = self._on_gesture - if callback is None: - return - callback(direction) - - def _try_grab_device(self, device: _EvdevDevice) -> None: - try: - device.grab() - except Exception: - return - - with self._gesture_lock: - self._gesture_grabbed_device = device - - def _release_gesture_grab(self) -> None: - with self._gesture_lock: - grabbed = self._gesture_grabbed_device - self._gesture_grabbed_device = None - - if grabbed is None: - return - - try: - grabbed.ungrab() - except Exception: - return - - def _read_cursor_position(self) -> tuple[int, int] | None: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - return system_integration.cursor_position() - except Exception: - return None - - if not self._hyprland_session: - return None - try: - proc = subprocess.run( - ["hyprctl", "-j", "cursorpos"], - capture_output=True, - text=True, - check=False, - timeout=0.8, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - if proc.returncode != 0: - return None - - try: - payload = cast(dict[str, object], json.loads(proc.stdout)) - except json.JSONDecodeError: - return None - - x_raw = payload.get("x") - y_raw = payload.get("y") - if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): - return None - return int(x_raw), int(y_raw) - - def _restore_cursor_position(self, position: tuple[int, int]) -> None: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - x, y = position - try: - _ = system_integration.move_cursor(x=x, y=y) - except Exception: - return - return - - if not self._hyprland_session: - return - - x, y = position - try: - _ = subprocess.run( - ["hyprctl", "dispatch", "movecursor", str(x), str(y)], - capture_output=True, - text=True, - check=False, - timeout=0.8, - ) - except (OSError, subprocess.TimeoutExpired): - return - - @staticmethod - def _classify_gesture(dx: int, dy: int, threshold_px: int) -> str | None: - if max(abs(dx), abs(dy)) < threshold_px: - return None - if abs(dx) >= abs(dy): - return "right" if dx > 0 else "left" - return "down" if dy > 0 else "up" - - def _dispatch_front_press(self) -> None: - if self._should_fire_front(): - self._on_front_press() - - def _dispatch_rear_press(self) -> None: - if self._should_fire_rear(): - self._on_rear_press() - - def _should_fire_front(self) -> bool: - now = time.monotonic() - with self._debounce_lock: - if now - self._last_front_press_monotonic < self._debounce_s: - return False - self._last_front_press_monotonic = now - return True - - def _should_fire_rear(self) -> bool: - now = time.monotonic() - with self._debounce_lock: - if now - self._last_rear_press_monotonic < self._debounce_s: - return False - self._last_rear_press_monotonic = now - return True - - -class _EvdevEvent(Protocol): - type: int - value: int - code: int - - -class _EvdevDevice(Protocol): - fd: int - - def read(self) -> list[_EvdevEvent]: ... - - def capabilities(self) -> dict[int, list[int]]: ... - - def grab(self) -> None: ... - - def ungrab(self) -> None: ... - - def close(self) -> None: ... - - -class _InputDeviceCtor(Protocol): - def __call__(self, path: str) -> _EvdevDevice: ... - - -class _ListDevicesFn(Protocol): - def __call__(self) -> list[str]: ... - - -class _Ecodes(Protocol): - BTN_SIDE: int - BTN_EXTRA: int - BTN_LEFT: int - BTN_RIGHT: int - EV_KEY: int - EV_REL: int - REL_X: int - REL_Y: int - - -class _MouseListener(Protocol): - def start(self) -> None: ... - - def stop(self) -> None: ... - - -class _MouseListenerCtor(Protocol): - def __call__( - self, - *, - on_click: Callable[[int, int, object, bool], None], - on_move: Callable[[int, int], None] | None = None, - ) -> _MouseListener: ... diff --git a/vibemouse/output.py b/vibemouse/output.py deleted file mode 100644 index 6e28457..0000000 --- a/vibemouse/output.py +++ /dev/null @@ -1,355 +0,0 @@ -from __future__ import annotations - -import importlib -import json -import shlex -import subprocess -import time -from dataclasses import dataclass -from typing import Protocol, cast - -import pyperclip - -from vibemouse.system_integration import ( - SystemIntegration, - create_system_integration, - is_terminal_window_payload, - load_atspi_module, - probe_text_input_focus_via_atspi, - probe_send_enter_via_atspi, -) - - -class TextOutput: - def __init__( - self, - *, - system_integration: SystemIntegration | None = None, - openclaw_command: str = "openclaw", - openclaw_agent: str | None = None, - openclaw_timeout_s: float = 20.0, - openclaw_retries: int = 0, - ) -> None: - try: - keyboard_module = importlib.import_module("pynput.keyboard") - except Exception as error: - raise RuntimeError( - f"Failed to load keyboard control dependencies: {error}" - ) from error - - controller_ctor = cast( - _ControllerCtor, - getattr(cast(object, keyboard_module), "Controller"), - ) - key_holder = cast( - _KeyHolder, - getattr(cast(object, keyboard_module), "Key"), - ) - self._kb: _KeyboardController = controller_ctor() - self._enter_key: object = key_holder.enter - self._ctrl_key: object = key_holder.ctrl - self._shift_key: object = key_holder.shift - self._atspi: object | None = load_atspi_module() - self._system_integration: SystemIntegration = ( - system_integration - if system_integration is not None - else create_system_integration() - ) - self._hyprland_session: bool = self._system_integration.is_hyprland - self._openclaw_command: str = openclaw_command - self._openclaw_agent: str | None = openclaw_agent - self._openclaw_timeout_s: float = max(0.5, openclaw_timeout_s) - self._openclaw_retries: int = max(0, int(openclaw_retries)) - - def send_enter(self, *, mode: str = "enter") -> None: - normalized = mode.strip().lower() - if normalized == "none": - return - if normalized == "enter": - if self._send_hyprland_shortcut(mod="", key="Return"): - return - if self._send_enter_via_atspi(): - return - self._tap_key(self._enter_key) - return - if normalized == "ctrl_enter": - self._tap_modified_key(self._ctrl_key, self._enter_key) - return - if normalized == "shift_enter": - self._tap_modified_key(self._shift_key, self._enter_key) - return - raise ValueError(f"Unsupported enter mode: {mode!r}") - - def inject_or_clipboard(self, text: str, *, auto_paste: bool = False) -> str: - normalized = text.strip() - if not normalized: - return "empty" - - if self._is_text_input_focused(): - self._kb.type(normalized) - return "typed" - - pyperclip.copy(normalized) - if auto_paste: - try: - self._paste_clipboard() - return "pasted" - except Exception: - return "clipboard" - return "clipboard" - - def send_to_openclaw(self, text: str) -> str: - return self.send_to_openclaw_result(text).route - - def send_to_openclaw_result(self, text: str) -> "OpenClawDispatchResult": - normalized = text.strip() - if not normalized: - return OpenClawDispatchResult(route="empty", reason="empty_text") - - command = self._build_openclaw_command(normalized) - if command is None: - pyperclip.copy(normalized) - return OpenClawDispatchResult(route="clipboard", reason="invalid_command") - - attempts = max(1, int(getattr(self, "_openclaw_retries", 0)) + 1) - last_reason = "spawn_error" - for attempt in range(attempts): - try: - _ = subprocess.Popen( - command, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - start_new_session=True, - ) - if attempt == 0: - return OpenClawDispatchResult( - route="openclaw", - reason="dispatched", - ) - return OpenClawDispatchResult( - route="openclaw", - reason=f"dispatched_after_retry_{attempt}", - ) - except OSError as error: - last_reason = f"spawn_error:{error.__class__.__name__}" - - pyperclip.copy(normalized) - return OpenClawDispatchResult(route="clipboard", reason=last_reason) - - def _build_openclaw_command(self, message: str) -> list[str] | None: - raw_command = str(getattr(self, "_openclaw_command", "openclaw")).strip() - if not raw_command: - return None - - try: - parts = shlex.split(raw_command) - except ValueError: - return None - - if not parts: - return None - - command = [*parts, "agent", "--message", message, "--json"] - agent = getattr(self, "_openclaw_agent", None) - if isinstance(agent, str): - normalized_agent = agent.strip() - if normalized_agent: - command.extend(["--agent", normalized_agent]) - return command - - def _paste_clipboard(self) -> None: - terminal_active = self._is_hyprland_terminal_active() - for mod, key in self._paste_shortcuts(terminal_active=terminal_active): - if self._send_platform_shortcut(mod=mod, key=key): - return - - self._kb.press(self._ctrl_key) - self._kb.press("v") - self._kb.release("v") - self._kb.release(self._ctrl_key) - - def _tap_key(self, key: object) -> None: - self._kb.press(key) - time.sleep(0.012) - self._kb.release(key) - - def _tap_modified_key(self, modifier: object, key: object) -> None: - self._kb.press(modifier) - self._kb.press(key) - time.sleep(0.012) - self._kb.release(key) - self._kb.release(modifier) - - def _send_enter_via_atspi(self) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - handled = system_integration.send_enter_via_accessibility() - except Exception: - handled = None - if handled is True: - return True - - atspi_module = getattr(self, "_atspi", None) - return probe_send_enter_via_atspi( - atspi_module=atspi_module, - lazy_load=False, - ) - - def _paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - shortcuts = system_integration.paste_shortcuts( - terminal_active=terminal_active - ) - except Exception: - shortcuts = () - if shortcuts: - return shortcuts - - if terminal_active: - return ( - ("CTRL SHIFT", "V"), - ("SHIFT", "Insert"), - ("CTRL", "V"), - ) - return (("CTRL", "V"),) - - def _send_platform_shortcut(self, *, mod: str, key: str) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - if bool(system_integration.send_shortcut(mod=mod, key=key)): - return True - if not self._hyprland_session: - return False - except Exception: - if not self._hyprland_session: - return False - - if not self._hyprland_session: - return False - - mod_part = mod.strip().upper() - if mod_part: - arg = f"{mod_part}, {key}, activewindow" - else: - arg = f", {key}, activewindow" - - try: - proc = subprocess.run( - ["hyprctl", "dispatch", "sendshortcut", arg], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - def _send_hyprland_shortcut(self, *, mod: str, key: str) -> bool: - return self._send_platform_shortcut(mod=mod, key=key) - - def _is_terminal_window_active(self) -> bool: - payload_map: dict[str, object] | None = None - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - terminal_active = system_integration.is_terminal_window_active() - except Exception: - terminal_active = None - if isinstance(terminal_active, bool): - return terminal_active - - if not self._hyprland_session: - return False - - if payload_map is None: - try: - proc = subprocess.run( - ["hyprctl", "-j", "activewindow"], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - if proc.returncode != 0: - return False - - try: - payload_obj = cast(object, json.loads(proc.stdout)) - except json.JSONDecodeError: - return False - - if not isinstance(payload_obj, dict): - return False - - payload_map = cast(dict[str, object], payload_obj) - - return is_terminal_window_payload(payload_map) - - def _is_hyprland_terminal_active(self) -> bool: - return self._is_terminal_window_active() - - def _is_text_input_focused(self) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - focused = system_integration.is_text_input_focused() - except Exception: - focused = None - if isinstance(focused, bool): - return focused - - return probe_text_input_focus_via_atspi() - - -class _KeyboardController(Protocol): - def press(self, key: object) -> None: ... - - def release(self, key: object) -> None: ... - - def type(self, text: str) -> None: ... - - -class _ControllerCtor(Protocol): - def __call__(self) -> _KeyboardController: ... - - -class _KeyHolder(Protocol): - enter: object - ctrl: object - shift: object - - -@dataclass(frozen=True) -class OpenClawDispatchResult: - route: str - reason: str diff --git a/vibemouse/system_integration.py b/vibemouse/system_integration.py deleted file mode 100644 index d52203c..0000000 --- a/vibemouse/system_integration.py +++ /dev/null @@ -1,341 +0,0 @@ -from __future__ import annotations - -import importlib -import json -import os -import subprocess -import sys -from collections.abc import Mapping -from typing import Protocol, cast - - -_TERMINAL_CLASS_HINTS: set[str] = { - "foot", - "kitty", - "alacritty", - "wezterm", - "ghostty", - "gnome-terminal", - "gnome-terminal-server", - "konsole", - "tilix", - "xterm", - "terminator", - "xfce4-terminal", - "urxvt", - "st", - "tabby", - "hyper", - "warp", - "windowsterminal", - "wt", -} - -_TERMINAL_TITLE_HINTS: set[str] = { - "terminal", - "tmux", - "bash", - "zsh", - "fish", - "powershell", - "cmd.exe", -} - - -def is_terminal_window_payload(payload: Mapping[str, object]) -> bool: - window_class = str(payload.get("class", "")).lower() - initial_class = str(payload.get("initialClass", "")).lower() - title = str(payload.get("title", "")).lower() - - if any( - hint in window_class or hint in initial_class for hint in _TERMINAL_CLASS_HINTS - ): - return True - - return any(hint in title for hint in _TERMINAL_TITLE_HINTS) - - -class SystemIntegration(Protocol): - @property - def is_hyprland(self) -> bool: ... - - def send_shortcut(self, *, mod: str, key: str) -> bool: ... - - def active_window(self) -> dict[str, object] | None: ... - - def cursor_position(self) -> tuple[int, int] | None: ... - - def move_cursor(self, *, x: int, y: int) -> bool: ... - - def switch_workspace(self, direction: str) -> bool: ... - - def is_text_input_focused(self) -> bool | None: ... - - def send_enter_via_accessibility(self) -> bool | None: ... - - def is_terminal_window_active(self) -> bool | None: ... - - def paste_shortcuts( - self, *, terminal_active: bool - ) -> tuple[tuple[str, str], ...]: ... - - -class NoopSystemIntegration: - @property - def is_hyprland(self) -> bool: - return False - - def send_shortcut(self, *, mod: str, key: str) -> bool: - del mod - del key - return False - - def active_window(self) -> dict[str, object] | None: - return None - - def cursor_position(self) -> tuple[int, int] | None: - return None - - def move_cursor(self, *, x: int, y: int) -> bool: - del x - del y - return False - - def switch_workspace(self, direction: str) -> bool: - del direction - return False - - def is_text_input_focused(self) -> bool | None: - return None - - def send_enter_via_accessibility(self) -> bool | None: - return None - - def is_terminal_window_active(self) -> bool | None: - return None - - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - del terminal_active - return () - - -class WindowsSystemIntegration(NoopSystemIntegration): - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - if terminal_active: - return ( - ("CTRL SHIFT", "V"), - ("SHIFT", "Insert"), - ("CTRL", "V"), - ) - return (("CTRL", "V"),) - - -class MacOSSystemIntegration(NoopSystemIntegration): - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - if terminal_active: - return (("CMD", "V"),) - return (("CMD", "V"),) - - -class HyprlandSystemIntegration: - @property - def is_hyprland(self) -> bool: - return True - - def send_shortcut(self, *, mod: str, key: str) -> bool: - mod_part = mod.strip().upper() - if mod_part: - arg = f"{mod_part}, {key}, activewindow" - else: - arg = f", {key}, activewindow" - return self._dispatch(["sendshortcut", arg], timeout=1.0) - - def active_window(self) -> dict[str, object] | None: - return self._query_json(["activewindow"], timeout=1.0) - - def cursor_position(self) -> tuple[int, int] | None: - payload = self._query_json(["cursorpos"], timeout=0.8) - if payload is None: - return None - - x_raw = payload.get("x") - y_raw = payload.get("y") - if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): - return None - - return int(x_raw), int(y_raw) - - def move_cursor(self, *, x: int, y: int) -> bool: - return self._dispatch(["movecursor", str(x), str(y)], timeout=0.8) - - def switch_workspace(self, direction: str) -> bool: - workspace_arg = "e-1" if direction == "left" else "e+1" - return self._dispatch(["workspace", workspace_arg], timeout=1.0) - - def is_text_input_focused(self) -> bool | None: - return probe_text_input_focus_via_atspi() - - def send_enter_via_accessibility(self) -> bool | None: - return probe_send_enter_via_atspi() - - def is_terminal_window_active(self) -> bool | None: - payload = self.active_window() - if payload is None: - return False - return is_terminal_window_payload(payload) - - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - if terminal_active: - return ( - ("CTRL SHIFT", "V"), - ("SHIFT", "Insert"), - ("CTRL", "V"), - ) - return (("CTRL", "V"),) - - @staticmethod - def _dispatch(args: list[str], *, timeout: float) -> bool: - try: - proc = subprocess.run( - ["hyprctl", "dispatch", *args], - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - @staticmethod - def _query_json(args: list[str], *, timeout: float) -> dict[str, object] | None: - try: - proc = subprocess.run( - ["hyprctl", "-j", *args], - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - if proc.returncode != 0: - return None - - try: - payload_obj = cast(object, json.loads(proc.stdout)) - except json.JSONDecodeError: - return None - - if not isinstance(payload_obj, dict): - return None - - return cast(dict[str, object], payload_obj) - - -def detect_hyprland_session(*, env: Mapping[str, str] | None = None) -> bool: - source = env if env is not None else os.environ - desktop = source.get("XDG_CURRENT_DESKTOP", "") - if "hyprland" in desktop.lower(): - return True - return bool(source.get("HYPRLAND_INSTANCE_SIGNATURE")) - - -def create_system_integration( - *, - env: Mapping[str, str] | None = None, - platform_name: str | None = None, -) -> SystemIntegration: - if detect_hyprland_session(env=env): - return HyprlandSystemIntegration() - - normalized_platform = ( - (platform_name if platform_name is not None else sys.platform).strip().lower() - ) - if normalized_platform.startswith("win"): - return WindowsSystemIntegration() - if normalized_platform == "darwin": - return MacOSSystemIntegration() - - return NoopSystemIntegration() - - -def probe_text_input_focus_via_atspi(*, timeout_s: float = 1.5) -> bool: - script = ( - "import gi\n" - "gi.require_version('Atspi', '2.0')\n" - "from gi.repository import Atspi\n" - "obj = Atspi.get_desktop(0).get_focus()\n" - "editable = False\n" - "role = ''\n" - "if obj is not None:\n" - " role = obj.get_role_name().lower()\n" - " attrs = obj.get_attributes() or []\n" - " for it in attrs:\n" - " s = str(it).lower()\n" - " if s == 'editable:true' or s.endswith(':editable:true'):\n" - " editable = True\n" - " break\n" - "roles = {'text', 'entry', 'password text', 'terminal', 'paragraph', 'document text', 'document web'}\n" - "print('1' if editable or role in roles else '0')\n" - ) - - try: - proc = subprocess.run( - ["python3", "-c", script], - capture_output=True, - text=True, - check=False, - timeout=timeout_s, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "1" - - -def load_atspi_module() -> object | None: - try: - gi = importlib.import_module("gi") - require_version = cast(_RequireVersionFn, getattr(gi, "require_version")) - require_version("Atspi", "2.0") - atspi_repo = cast(object, importlib.import_module("gi.repository")) - return cast(object, getattr(atspi_repo, "Atspi")) - except Exception: - return None - - -def probe_send_enter_via_atspi( - *, atspi_module: object | None = None, lazy_load: bool = True -) -> bool: - module = atspi_module - if module is None and lazy_load: - module = load_atspi_module() - if module is None: - return False - - try: - key_synth = cast(object, getattr(module, "KeySynthType")) - press_release = cast(object, getattr(key_synth, "PRESSRELEASE")) - generate_keyboard_event = cast( - _GenerateKeyboardEventFn, - getattr(module, "generate_keyboard_event"), - ) - return bool(generate_keyboard_event(65293, None, press_release)) - except Exception: - return False - - -class _GenerateKeyboardEventFn(Protocol): - def __call__( - self, - keyval: int, - keystring: str | None, - synth_type: object, - ) -> bool: ... - - -class _RequireVersionFn(Protocol): - def __call__(self, namespace: str, version: str) -> None: ... diff --git a/vibemouse/transcriber.py b/vibemouse/transcriber.py deleted file mode 100644 index 4375989..0000000 --- a/vibemouse/transcriber.py +++ /dev/null @@ -1,435 +0,0 @@ -from __future__ import annotations - -import importlib -from pathlib import Path -from threading import Lock -from typing import Protocol, cast - -from vibemouse.config import AppConfig - - -class SenseVoiceTranscriber: - def __init__(self, config: AppConfig) -> None: - self._config: AppConfig = config - self._transcriber: _TranscriberProtocol | None = None - self._transcriber_lock: Lock = Lock() - self.device_in_use: str = config.device - self.backend_in_use: str = "unknown" - - def transcribe(self, audio_path: Path) -> str: - self._ensure_transcriber_loaded() - if self._transcriber is None: - raise RuntimeError("SenseVoice transcriber is not initialized") - return self._transcriber.transcribe(audio_path) - - def prewarm(self) -> None: - self._ensure_transcriber_loaded() - - def _ensure_transcriber_loaded(self) -> None: - if self._transcriber is not None: - return - - with self._transcriber_lock: - if self._transcriber is not None: - return - - backend = self._config.transcriber_backend - if backend == "auto": - self._build_auto_backend() - return - - if backend == "funasr": - self._build_funasr_backend() - return - - if backend == "funasr_onnx": - self._build_funasr_onnx_backend() - return - - raise RuntimeError( - f"Unsupported backend {backend!r}. Use auto, funasr, or funasr_onnx." - ) - - def _build_auto_backend(self) -> None: - errors: list[str] = [] - - if self._looks_like_intel_npu_device(self._config.device): - try: - self._build_funasr_onnx_backend() - return - except Exception as error: - errors.append(f"funasr_onnx: {error}") - try: - self._build_funasr_backend() - return - except Exception as fallback_error: - errors.append(f"funasr: {fallback_error}") - - else: - try: - self._build_funasr_backend() - return - except Exception as error: - errors.append(f"funasr: {error}") - try: - self._build_funasr_onnx_backend() - return - except Exception as fallback_error: - errors.append(f"funasr_onnx: {fallback_error}") - - raise RuntimeError( - "Failed to initialize any transcriber backend. " + " | ".join(errors) - ) - - def _build_funasr_backend(self) -> None: - backend = _FunASRBackend(self._config) - self._transcriber = backend - self.device_in_use = backend.device_in_use - self.backend_in_use = "funasr" - - def _build_funasr_onnx_backend(self) -> None: - backend = _FunASRONNXBackend(self._config) - self._transcriber = backend - self.device_in_use = backend.device_in_use - self.backend_in_use = "funasr_onnx" - - @staticmethod - def _looks_like_intel_npu_device(device: str) -> bool: - normalized = device.strip().lower() - return normalized.startswith("npu") or normalized.startswith("openvino:npu") - - -class _FunASRBackend: - def __init__(self, config: AppConfig) -> None: - self._config: AppConfig = config - self._model: _SenseModel | None = None - self._postprocess: _PostprocessFn | None = None - self._load_lock: Lock = Lock() - self.device_in_use: str = config.device - self._ensure_model_loaded() - - def transcribe(self, audio_path: Path) -> str: - if self._model is None: - raise RuntimeError("FunASR model is not initialized") - - result = self._model.generate( - input=str(audio_path), - cache={}, - language=self._config.language, - use_itn=self._config.use_itn, - merge_vad=self._config.merge_vad, - merge_length_s=self._config.merge_length_s, - batch_size_s=60, - ) - if not result: - return "" - - text_obj = result[0].get("text", "") - if not isinstance(text_obj, str): - return "" - - text = text_obj.strip() - if self._postprocess is None: - return text - return self._postprocess(text).strip() - - def _ensure_model_loaded(self) -> None: - if self._model is not None: - return - with self._load_lock: - if self._model is not None: - return - try: - model, postprocess = self._create_model(self._config.device) - self._model = model - self._postprocess = postprocess - self.device_in_use = self._config.device - return - except Exception as primary_error: - if ( - not self._config.fallback_to_cpu - or self._config.device.strip().lower() == "cpu" - ): - raise RuntimeError( - f"Failed to load FunASR SenseVoice on {self._config.device}: {primary_error}" - ) from primary_error - - try: - model, postprocess = self._create_model("cpu") - except Exception as cpu_error: - raise RuntimeError( - f"Failed to load FunASR SenseVoice on {self._config.device} and cpu fallback: {cpu_error}" - ) from cpu_error - - self._model = model - self._postprocess = postprocess - self.device_in_use = "cpu" - - def _create_model(self, device: str) -> tuple[_SenseModel, _PostprocessFn]: - try: - funasr_module = importlib.import_module("funasr") - postprocess_module = importlib.import_module( - "funasr.utils.postprocess_utils" - ) - except Exception as error: - raise RuntimeError( - "FunASR is not installed or not importable in current environment" - ) from error - - auto_model_ctor = cast(_AutoModelCtor, getattr(funasr_module, "AutoModel")) - rich_transcription_postprocess = cast( - _PostprocessFn, - getattr(postprocess_module, "rich_transcription_postprocess"), - ) - - kwargs: dict[str, object] = { - "model": self._config.model_name, - "trust_remote_code": self._config.trust_remote_code, - "device": device, - "disable_update": True, - } - if self._config.enable_vad: - kwargs["vad_model"] = "fsmn-vad" - kwargs["vad_kwargs"] = { - "max_single_segment_time": self._config.vad_max_single_segment_ms - } - - model = auto_model_ctor(**kwargs) - return model, rich_transcription_postprocess - - -class _FunASRONNXBackend: - def __init__(self, config: AppConfig) -> None: - self._config: AppConfig = config - self._model: _ONNXSenseVoiceModel | None = None - self._postprocess: _PostprocessFn | None = None - self._load_lock: Lock = Lock() - self.device_in_use: str = "cpu" - self._ensure_model_loaded() - - def transcribe(self, audio_path: Path) -> str: - if self._model is None: - raise RuntimeError("funasr_onnx SenseVoice model is not initialized") - if self._postprocess is None: - raise RuntimeError("funasr postprocess function is not initialized") - - textnorm = "withitn" if self._config.use_itn else "woitn" - result = self._model( - str(audio_path), - language=self._config.language, - textnorm=textnorm, - ) - if not result: - return "" - - raw_text = result[0] - return self._postprocess(raw_text).strip() - - def _ensure_model_loaded(self) -> None: - if self._model is not None: - return - - with self._load_lock: - if self._model is not None: - return - try: - SenseVoiceSmall = self._load_onnx_class() - postprocess = self._load_postprocess() - except Exception as error: - raise RuntimeError( - "funasr_onnx backend requires funasr-onnx and funasr packages" - ) from error - - requested_path = self._resolve_onnx_model_dir() - self._ensure_tokenizer_file(requested_path) - device_id = self._resolve_onnx_device_id(self._config.device) - - try: - model = SenseVoiceSmall( - model_dir=str(requested_path), - batch_size=1, - device_id=device_id, - quantize=True, - cache_dir=None, - ) - self._model = model - self._postprocess = postprocess - self.device_in_use = self._resolve_device_label(self._config.device) - return - except Exception as primary_error: - if not self._config.fallback_to_cpu: - raise RuntimeError( - f"Failed to load funasr_onnx backend on {self._config.device}: {primary_error}" - ) from primary_error - - try: - model = SenseVoiceSmall( - model_dir=str(requested_path), - batch_size=1, - device_id="-1", - quantize=True, - cache_dir=None, - ) - except Exception as cpu_error: - raise RuntimeError( - f"Failed to load funasr_onnx backend on {self._config.device} and cpu fallback: {cpu_error}" - ) from cpu_error - - self._model = model - self._postprocess = postprocess - self.device_in_use = "cpu" - - def _resolve_onnx_model_dir(self) -> Path: - raw_model = self._config.model_name - canonical_model = raw_model - if raw_model == "iic/SenseVoiceSmall": - canonical_model = "iic/SenseVoiceSmall-onnx" - - if canonical_model.startswith("iic/"): - return self._download_modelscope_snapshot(canonical_model) - - path_candidate = Path(canonical_model) - if not path_candidate.exists(): - return path_candidate - - if self._contains_onnx_model(path_candidate): - return path_candidate - - raise RuntimeError( - f"ONNX model directory {path_candidate} exists but model_quant.onnx/model.onnx is missing" - ) - - @staticmethod - def _contains_onnx_model(model_dir: Path) -> bool: - return (model_dir / "model_quant.onnx").exists() or ( - model_dir / "model.onnx" - ).exists() - - @staticmethod - def _download_modelscope_snapshot(model_id: str) -> Path: - try: - snapshot_mod = importlib.import_module("modelscope.hub.snapshot_download") - except Exception as error: - raise RuntimeError( - "modelscope is required to download ONNX model snapshots" - ) from error - - snapshot_download = cast( - _SnapshotDownloadFn, - getattr(snapshot_mod, "snapshot_download"), - ) - snapshot_path = snapshot_download(model_id) - model_dir = Path(snapshot_path) - if not model_dir.exists(): - raise RuntimeError(f"Downloaded model path does not exist: {snapshot_path}") - if not _FunASRONNXBackend._contains_onnx_model(model_dir): - raise RuntimeError( - f"Downloaded model {model_id} missing model_quant.onnx/model.onnx" - ) - return model_dir - - @staticmethod - def _resolve_onnx_device_id(device: str) -> str: - normalized = device.strip().lower() - if normalized == "cpu": - return "-1" - if normalized.startswith("cuda"): - parts = normalized.split(":", 1) - return parts[1] if len(parts) > 1 and parts[1] else "0" - return "-1" - - @staticmethod - def _resolve_device_label(device: str) -> str: - normalized = device.strip().lower() - if normalized.startswith("cuda"): - return normalized - return "cpu" - - def _ensure_tokenizer_file(self, model_dir: Path) -> None: - target = model_dir / "chn_jpn_yue_eng_ko_spectok.bpe.model" - if target.exists(): - return - - fallback = ( - Path.home() - / ".cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model" - ) - if fallback.exists(): - model_dir.mkdir(parents=True, exist_ok=True) - _ = target.write_bytes(fallback.read_bytes()) - return - - raise RuntimeError( - "Tokenizer file chn_jpn_yue_eng_ko_spectok.bpe.model is missing and no fallback was found" - ) - - @staticmethod - def _load_onnx_class() -> _ONNXSenseVoiceCtor: - module = importlib.import_module("funasr_onnx") - return cast(_ONNXSenseVoiceCtor, getattr(module, "SenseVoiceSmall")) - - @staticmethod - def _load_postprocess() -> _PostprocessFn: - post_module = importlib.import_module("funasr.utils.postprocess_utils") - return cast( - _PostprocessFn, - getattr(post_module, "rich_transcription_postprocess"), - ) - - -class _TranscriberProtocol(Protocol): - device_in_use: str - - def transcribe(self, audio_path: Path) -> str: ... - - -class _SenseResultItem(Protocol): - def get(self, key: str, default: str = "") -> str | object: ... - - -class _SenseModel(Protocol): - def generate( - self, - *, - input: str, - cache: dict[str, object], - language: str, - use_itn: bool, - merge_vad: bool, - merge_length_s: int, - batch_size_s: int, - ) -> list[_SenseResultItem]: ... - - -class _AutoModelCtor(Protocol): - def __call__(self, **kwargs: object) -> _SenseModel: ... - - -class _PostprocessFn(Protocol): - def __call__(self, text: str) -> str: ... - - -class _ONNXSenseVoiceModel(Protocol): - def __call__( - self, - wav_content: str, - *, - language: str, - textnorm: str, - ) -> list[str]: ... - - -class _ONNXSenseVoiceCtor(Protocol): - def __call__( - self, - *, - model_dir: str, - batch_size: int, - device_id: str, - quantize: bool, - cache_dir: str | None, - ) -> _ONNXSenseVoiceModel: ... - - -class _SnapshotDownloadFn(Protocol): - def __call__(self, model_id: str) -> str: ...