From 2586304510d5620727db58bc931494a5f90a1ceb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 22 Apr 2026 08:43:38 +0000
Subject: [PATCH 1/8] feat: add GitHub Copilot orchestrator as alternative LLM
 backend

Agent-Logs-Url: https://github.com/Delgerskhn/video-use/sessions/d0dd1259-9f0d-4c67-a4d1-f368300227c1

Co-authored-by: Delgerskhn <57222574+Delgerskhn@users.noreply.github.com>
---
 .env.example    |   5 +
 README.md       |  44 +++
 orchestrator.py | 742 ++++++++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml  |   1 +
 4 files changed, 792 insertions(+)
 create mode 100644 orchestrator.py
diff --git a/.env.example b/.env.example
index 4c49a94..f300000 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1,6 @@
 ELEVENLABS_API_KEY=
+
+# Required for the GitHub Copilot orchestrator (orchestrator.py).
+# Create a Personal Access Token at https://github.com/settings/tokens
+# with the 'copilot' scope, then paste it here or export it in your shell.
+GITHUB_TOKEN=
diff --git a/README.md b/README.md
index 59cc112..adf9c37 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,12 @@ Drop raw footage in a folder, chat with Claude Code, get `final.mp4` back. Works
 
 ## Get started
 
+Two ways to run video-use — pick the one that matches your subscription:
+
+### Option A — Claude Code (original)
+
+Requires an Anthropic API subscription or Claude Pro.
+
 ```bash
 # 1. Clone and symlink into Claude Code's skills directory
 git clone https://github.com/browser-use/video-use
@@ -43,6 +49,44 @@ cd /path/to/your/videos
 claude
 ```
 
+### Option B — GitHub Copilot (no Anthropic key required)
+
+Uses your existing GitHub Copilot subscription as the LLM backend via the
+OpenAI-compatible Copilot API. Same pipeline, same production rules, same helpers.
+
+```bash
+# 1. Clone the repo
+git clone https://github.com/browser-use/video-use
+cd video-use
+
+# 2. Install deps (includes the openai SDK)
+pip install -e ".[copilot]"
+brew install ffmpeg           # required
+brew install yt-dlp            # optional
+
+# 3. Configure API keys
+cp .env.example .env
+$EDITOR .env
+#   ELEVENLABS_API_KEY=...    ← for transcription (same as before)
+#   GITHUB_TOKEN=...          ← PAT with 'copilot' scope
+#                               https://github.com/settings/tokens
+```
+
+Then run the orchestrator against your video folder:
+
+```bash
+python /path/to/video-use/orchestrator.py /path/to/your/videos
+```
+
+Available options:
+
+```
+--model gpt-4o          # default; also: gpt-4o-mini, claude-3.5-sonnet, o3-mini
+--endpoint <url>        # default: https://api.githubcopilot.com
+                        # GitHub Models alternative: https://models.inference.ai.azure.com
+--max-turns 100         # safety cap on LLM turns (default: 100)
+```
+
 And in the session:
 
 > edit these into a launch video
diff --git a/orchestrator.py b/orchestrator.py
new file mode 100644
index 0000000..964fd2d
--- /dev/null
+++ b/orchestrator.py
@@ -0,0 +1,742 @@
+#!/usr/bin/env python3
+"""GitHub Copilot-backed video editing orchestrator for video-use.
+
+Replaces the `claude` CLI runtime with a standalone Python script that drives
+the same video editing pipeline using the GitHub Copilot API (OpenAI-compatible).
+All 12 hard production rules from SKILL.md are enforced via the same system
+prompt — no logic changes to the skill or helpers are needed.
+
+Requirements:
+  pip install -e ".[copilot]"          # openai>=1.0
+  export GITHUB_TOKEN=<your PAT>       # PAT with `copilot` scope
+  ELEVENLABS_API_KEY=... in .env       # for transcription (same as before)
+  ffmpeg and ffprobe on PATH
+
+Usage:
+  python orchestrator.py /path/to/videos
+  python orchestrator.py /path/to/videos --model gpt-4o
+  python orchestrator.py /path/to/videos --endpoint https://models.inference.ai.azure.com
+"""
+
+from __future__ import annotations
+
+import argparse
+import base64
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Repo-relative paths
+# ---------------------------------------------------------------------------
+
+REPO_ROOT = Path(__file__).resolve().parent
+HELPERS_DIR = REPO_ROOT / "helpers"
+SKILL_MD = REPO_ROOT / "SKILL.md"
+
+# ---------------------------------------------------------------------------
+# System prompt
+# ---------------------------------------------------------------------------
+
+
+def load_skill_prompt() -> str:
+    """Read SKILL.md and strip the YAML front matter used by Claude Code."""
+    text = SKILL_MD.read_text()
+    if text.startswith("---"):
+        end = text.find("---", 3)
+        if end != -1:
+            text = text[end + 3 :].lstrip("\n")
+    return text
+
+
+# Maximum characters returned from a single tool call before truncation.
+# Keeps large files (packed transcripts, long ffmpeg logs) from consuming
+# the entire context window.
+MAX_TOOL_RESULT_LENGTH = 20_000
+
+# ---------------------------------------------------------------------------
+# Tool schemas (OpenAI function-calling format)
+# ---------------------------------------------------------------------------
+
+TOOLS: list[dict] = [
+    {
+        "type": "function",
+        "function": {
+            "name": "transcribe",
+            "description": (
+                "Transcribe a single video with ElevenLabs Scribe. "
+                "Writes word-level transcript JSON to edit/transcripts/<stem>.json. "
+                "Cached — skips upload if the JSON already exists."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "video_path": {
+                        "type": "string",
+                        "description": "Absolute path to the video file.",
+                    },
+                    "edit_dir": {
+                        "type": "string",
+                        "description": "Edit output directory. Defaults to <video_parent>/edit.",
+                    },
+                    "language": {
+                        "type": "string",
+                        "description": "ISO language code (e.g. 'en'). Omit to auto-detect.",
+                    },
+                    "num_speakers": {
+                        "type": "integer",
+                        "description": "Number of speakers. Improves diarization when known.",
+                    },
+                },
+                "required": ["video_path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "transcribe_batch",
+            "description": (
+                "Batch-transcribe every video in a directory using parallel workers. "
+                "Cached per source — already-transcribed files are skipped."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "videos_dir": {
+                        "type": "string",
+                        "description": "Directory containing source videos.",
+                    },
+                    "workers": {
+                        "type": "integer",
+                        "description": "Parallel workers (default 4).",
+                    },
+                    "edit_dir": {
+                        "type": "string",
+                        "description": "Override edit output directory.",
+                    },
+                    "num_speakers": {
+                        "type": "integer",
+                        "description": "Number of speakers (optional).",
+                    },
+                },
+                "required": ["videos_dir"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "pack_transcripts",
+            "description": (
+                "Pack all per-source transcript JSONs in edit/transcripts/ into "
+                "takes_packed.md — the primary phrase-level reading surface for cut decisions."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "edit_dir": {
+                        "type": "string",
+                        "description": "Edit output directory containing transcripts/ subdirectory.",
+                    },
+                    "silence_threshold": {
+                        "type": "number",
+                        "description": "Silence gap in seconds that triggers a phrase break (default 0.5).",
+                    },
+                },
+                "required": ["edit_dir"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "timeline_view",
+            "description": (
+                "Generate a filmstrip + waveform PNG for a time range of a video. "
+                "Use at decision points (ambiguous pauses, retake comparison, cut-point "
+                "sanity checks). NOT a scan tool — call only when you need a visual check."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "video_path": {
+                        "type": "string",
+                        "description": "Absolute path to the video file.",
+                    },
+                    "start": {
+                        "type": "number",
+                        "description": "Start time in seconds.",
+                    },
+                    "end": {
+                        "type": "number",
+                        "description": "End time in seconds.",
+                    },
+                    "n_frames": {
+                        "type": "integer",
+                        "description": "Number of filmstrip frames to extract (default 8).",
+                    },
+                    "transcript_path": {
+                        "type": "string",
+                        "description": "Optional path to a transcript JSON for word label overlay.",
+                    },
+                },
+                "required": ["video_path", "start", "end"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "render",
+            "description": (
+                "Render a video from an EDL (edit decision list JSON). "
+                "Runs the full pipeline: per-segment extract with grade + 30ms audio fades → "
+                "lossless concat → overlays (PTS-shifted) → subtitles LAST → loudnorm."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "edl_path": {
+                        "type": "string",
+                        "description": "Absolute path to edl.json.",
+                    },
+                    "output_path": {
+                        "type": "string",
+                        "description": "Output video path (e.g. edit/final.mp4).",
+                    },
+                    "preview": {
+                        "type": "boolean",
+                        "description": "Preview mode: 1080p, CRF 22, faster encode.",
+                    },
+                    "build_subtitles": {
+                        "type": "boolean",
+                        "description": "Build master.srt from transcripts + EDL timeline offsets.",
+                    },
+                    "no_subtitles": {
+                        "type": "boolean",
+                        "description": "Skip subtitles even if the EDL references one.",
+                    },
+                    "no_loudnorm": {
+                        "type": "boolean",
+                        "description": "Skip audio loudness normalization (default: on, -14 LUFS).",
+                    },
+                },
+                "required": ["edl_path", "output_path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "grade",
+            "description": (
+                "Apply a color grade to a video via ffmpeg filter chain. "
+                "Presets: subtle, neutral_punch, warm_cinematic, none. "
+                "Omit both preset and filter for auto mode (data-driven per-clip correction)."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "input_path": {
+                        "type": "string",
+                        "description": "Input video path.",
+                    },
+                    "output_path": {
+                        "type": "string",
+                        "description": "Output video path.",
+                    },
+                    "preset": {
+                        "type": "string",
+                        "description": "Grade preset name.",
+                        "enum": ["subtle", "neutral_punch", "warm_cinematic", "none"],
+                    },
+                    "filter": {
+                        "type": "string",
+                        "description": "Raw ffmpeg filter string (overrides preset).",
+                    },
+                },
+                "required": ["input_path", "output_path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "bash",
+            "description": (
+                "Run a shell command. Use for ffprobe, yt-dlp, file listing, "
+                "ffmpeg one-offs, and other system tasks the other tools don't cover."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "Shell command to execute.",
+                    },
+                },
+                "required": ["command"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "read_file",
+            "description": "Read the contents of a text file (takes_packed.md, project.md, edl.json, transcripts, etc.).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Absolute path to the file.",
+                    },
+                },
+                "required": ["path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "write_file",
+            "description": "Write or append content to a file (edl.json, project.md, etc.).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Absolute path to the file.",
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "Content to write.",
+                    },
+                    "append": {
+                        "type": "boolean",
+                        "description": "If true, append to existing file instead of overwriting.",
+                    },
+                },
+                "required": ["path", "content"],
+            },
+        },
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Tool dispatch
+# ---------------------------------------------------------------------------
+
+
+def _run_helper(args: list[str]) -> tuple[int, str, str]:
+    """Run a Python helper from the helpers/ directory."""
+    cmd = [sys.executable] + args
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _format_result(returncode: int, stdout: str, stderr: str) -> str:
+    parts: list[str] = []
+    if stdout.strip():
+        parts.append(stdout.strip())
+    if returncode != 0 and stderr.strip():
+        parts.append(f"[stderr]\n{stderr.strip()}")
+    if not parts:
+        parts.append("(no output)" if returncode == 0 else f"[exit {returncode}] (no output)")
+    if returncode != 0:
+        parts.insert(0, f"[exit code {returncode}]")
+    return "\n".join(parts)
+
+
+def dispatch_tool(
+    name: str,
+    args: dict,
+    videos_dir: Path,
+    edit_dir: Path,
+) -> tuple[str, Path | None]:
+    """Execute a tool call. Returns (result_text, optional_image_path)."""
+
+    if name == "transcribe":
+        video_path = args["video_path"]
+        cmd = [str(HELPERS_DIR / "transcribe.py"), video_path]
+        if args.get("edit_dir"):
+            cmd += ["--edit-dir", args["edit_dir"]]
+        else:
+            cmd += ["--edit-dir", str(edit_dir)]
+        if args.get("language"):
+            cmd += ["--language", args["language"]]
+        if args.get("num_speakers"):
+            cmd += ["--num-speakers", str(args["num_speakers"])]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err), None
+
+    if name == "transcribe_batch":
+        cmd = [str(HELPERS_DIR / "transcribe_batch.py"), args["videos_dir"]]
+        if args.get("edit_dir"):
+            cmd += ["--edit-dir", args["edit_dir"]]
+        if args.get("workers"):
+            cmd += ["--workers", str(args["workers"])]
+        if args.get("num_speakers"):
+            cmd += ["--num-speakers", str(args["num_speakers"])]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err), None
+
+    if name == "pack_transcripts":
+        cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", args["edit_dir"]]
+        if args.get("silence_threshold") is not None:
+            cmd += ["--silence-threshold", str(args["silence_threshold"])]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err), None
+
+    if name == "timeline_view":
+        video_path = Path(args["video_path"])
+        start = args["start"]
+        end = args["end"]
+        verify_dir = edit_dir / "verify"
+        verify_dir.mkdir(parents=True, exist_ok=True)
+        out_img = verify_dir / f"timeline_{video_path.stem}_{start:.2f}_{end:.2f}.png"
+        cmd = [
+            str(HELPERS_DIR / "timeline_view.py"),
+            str(video_path),
+            str(start),
+            str(end),
+            "-o", str(out_img),
+        ]
+        if args.get("n_frames"):
+            cmd += ["--n-frames", str(args["n_frames"])]
+        if args.get("transcript_path"):
+            cmd += ["--transcript", args["transcript_path"]]
+        rc, out, err = _run_helper(cmd)
+        result = _format_result(rc, out, err)
+        if rc == 0 and out_img.exists():
+            result += f"\nImage saved to: {out_img}"
+            return result, out_img
+        return result, None
+
+    if name == "render":
+        cmd = [
+            str(HELPERS_DIR / "render.py"),
+            args["edl_path"],
+            "-o", args["output_path"],
+        ]
+        if args.get("preview"):
+            cmd.append("--preview")
+        if args.get("build_subtitles"):
+            cmd.append("--build-subtitles")
+        if args.get("no_subtitles"):
+            cmd.append("--no-subtitles")
+        if args.get("no_loudnorm"):
+            cmd.append("--no-loudnorm")
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err), None
+
+    if name == "grade":
+        cmd = [
+            str(HELPERS_DIR / "grade.py"),
+            args["input_path"],
+            "-o", args["output_path"],
+        ]
+        if args.get("filter"):
+            cmd += ["--filter", args["filter"]]
+        elif args.get("preset"):
+            cmd += ["--preset", args["preset"]]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err), None
+
+    if name == "bash":
+        command = args["command"]
+        proc = subprocess.run(
+            command,
+            shell=True,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+        return _format_result(proc.returncode, proc.stdout, proc.stderr), None
+
+    if name == "read_file":
+        path = Path(args["path"])
+        if not path.exists():
+            return f"File not found: {path}", None
+        try:
+            return path.read_text(), None
+        except Exception as e:
+            return f"Error reading file: {e}", None
+
+    if name == "write_file":
+        path = Path(args["path"])
+        path.parent.mkdir(parents=True, exist_ok=True)
+        mode = "a" if args.get("append") else "w"
+        try:
+            with open(path, mode) as f:
+                f.write(args["content"])
+            return f"Written to {path}", None
+        except Exception as e:
+            return f"Error writing file: {e}", None
+
+    return f"Unknown tool: {name}", None
+
+
+# ---------------------------------------------------------------------------
+# Session loop
+# ---------------------------------------------------------------------------
+
+
+def _build_image_message(img_path: Path) -> dict:
+    b64 = base64.b64encode(img_path.read_bytes()).decode()
+    return {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": f"[Timeline view image: {img_path.name}]"},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
+        ],
+    }
+
+
+def run_session(
+    videos_dir: Path,
+    model: str,
+    endpoint: str,
+    max_turns: int,
+) -> None:
+    try:
+        from openai import OpenAI
+    except ImportError:
+        sys.exit(
+            "openai package not found.\n"
+            "Install with:  pip install -e \".[copilot]\"\n"
+            "or:            pip install openai"
+        )
+
+    github_token = os.environ.get("GITHUB_TOKEN", "").strip()
+    if not github_token:
+        sys.exit(
+            "GITHUB_TOKEN is not set.\n"
+            "Export a Personal Access Token with the 'copilot' scope:\n"
+            "  export GITHUB_TOKEN=github_pat_..."
+        )
+
+    client = OpenAI(base_url=endpoint, api_key=github_token)
+
+    edit_dir = videos_dir / "edit"
+    edit_dir.mkdir(parents=True, exist_ok=True)
+
+    # Build system prompt with working-directory context injected at the end
+    system_prompt = load_skill_prompt()
+    system_prompt += (
+        f"\n\n## Session context\n\n"
+        f"- Videos directory: `{videos_dir}`\n"
+        f"- Edit directory: `{edit_dir}`\n"
+        f"- Helpers directory: `{HELPERS_DIR}`\n"
+        f"- All session outputs must go to `{edit_dir}/` (Hard Rule 12).\n"
+    )
+
+    messages: list[dict] = [{"role": "system", "content": system_prompt}]
+
+    # Seed with prior session memory if available
+    project_md = edit_dir / "project.md"
+    if project_md.exists():
+        prior = project_md.read_text().strip()
+        if prior:
+            messages.append({
+                "role": "user",
+                "content": (
+                    f"[Prior session memory — project.md]\n\n{prior}\n\n---\n"
+                    "I'm back. What should we pick up from or start fresh on?"
+                ),
+            })
+            messages.append({
+                "role": "assistant",
+                "content": (
+                    "I've reviewed the session notes above. Ready when you are — "
+                    "just tell me what you'd like to work on."
+                ),
+            })
+
+    print(f"\nvideo-use — GitHub Copilot orchestrator")
+    print(f"  model:    {model}")
+    print(f"  endpoint: {endpoint}")
+    print(f"  videos:   {videos_dir}")
+    print("Type your message. Enter 'exit' or press Ctrl+C to quit.\n")
+
+    # Prompt for the first user message
+    try:
+        first_input = input("You: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        print("\nBye.")
+        return
+
+    if not first_input or first_input.lower() in ("exit", "quit", "q"):
+        print("Bye.")
+        return
+
+    messages.append({"role": "user", "content": first_input})
+
+    turn = 0
+    while turn < max_turns:
+        try:
+            response = client.chat.completions.create(
+                model=model,
+                messages=messages,
+                tools=TOOLS,
+                max_tokens=4096,
+            )
+        except KeyboardInterrupt:
+            print("\n[Interrupted]")
+            break
+        except Exception as e:
+            print(f"\n[API error: {e}]")
+            break
+
+        choice = response.choices[0]
+        message = choice.message
+
+        # Serialize the assistant message back into the history
+        msg_dict: dict = {"role": "assistant", "content": message.content}
+        if message.tool_calls:
+            msg_dict["tool_calls"] = [
+                {
+                    "id": tc.id,
+                    "type": "function",
+                    "function": {
+                        "name": tc.function.name,
+                        "arguments": tc.function.arguments,
+                    },
+                }
+                for tc in message.tool_calls
+            ]
+        messages.append(msg_dict)
+
+        if message.tool_calls:
+            # Execute every requested tool call
+            image_paths: list[Path] = []
+
+            for tc in message.tool_calls:
+                tool_name = tc.function.name
+                try:
+                    tool_args = json.loads(tc.function.arguments)
+                except json.JSONDecodeError:
+                    tool_args = {}
+
+                # Pretty-print what we're doing
+                args_preview = ", ".join(
+                    f"{k}={v!r}" for k, v in list(tool_args.items())[:3]
+                )
+                print(f"  [tool] {tool_name}({args_preview})", flush=True)
+
+                result_text, image_path = dispatch_tool(
+                    tool_name, tool_args, videos_dir, edit_dir
+                )
+
+                # Truncate very long results so we don't blow the context window
+                if len(result_text) > MAX_TOOL_RESULT_LENGTH:
+                    result_text = result_text[:MAX_TOOL_RESULT_LENGTH] + "\n... [truncated]"
+
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": tc.id,
+                    "content": result_text,
+                })
+
+                if image_path and image_path.exists():
+                    image_paths.append(image_path)
+
+            # Inject timeline view images as user messages so vision-capable
+            # models (gpt-4o, etc.) can reason about them
+            for img_path in image_paths:
+                messages.append(_build_image_message(img_path))
+
+            turn += 1
+            continue  # Let the model respond to the tool results
+
+        # No tool calls — conversational turn
+        if message.content:
+            print(f"\nAssistant: {message.content}\n")
+
+        if choice.finish_reason == "stop":
+            try:
+                user_input = input("You: ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\nBye.")
+                break
+
+            if not user_input or user_input.lower() in ("exit", "quit", "q"):
+                print("Bye.")
+                break
+
+            messages.append({"role": "user", "content": user_input})
+
+        turn += 1
+
+    if turn >= max_turns:
+        print(f"\n[Reached max_turns={max_turns}. Session ended.]")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(
+        description="GitHub Copilot-backed video editing orchestrator for video-use.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Environment variables:\n"
+            "  GITHUB_TOKEN       Personal Access Token with 'copilot' scope (required)\n"
+            "  ELEVENLABS_API_KEY ElevenLabs API key for transcription (required for transcribe tools)\n"
+            "\nModel options (via GitHub Copilot):\n"
+            "  gpt-4o             Default — strong reasoning, vision support\n"
+            "  gpt-4o-mini        Faster and lighter\n"
+            "  claude-3.5-sonnet  Anthropic model via Copilot\n"
+            "  o3-mini            Reasoning model\n"
+            "\nAlternative endpoint (GitHub Models free tier):\n"
+            "  --endpoint https://models.inference.ai.azure.com\n"
+        ),
+    )
+    ap.add_argument(
+        "videos_dir",
+        type=Path,
+        help="Directory containing the source video files.",
+    )
+    ap.add_argument(
+        "--model",
+        default="gpt-4o",
+        help="Model identifier for the Copilot API (default: gpt-4o).",
+    )
+    ap.add_argument(
+        "--endpoint",
+        default="https://api.githubcopilot.com",
+        help=(
+            "GitHub Copilot API base URL "
+            "(default: https://api.githubcopilot.com). "
+            "Use https://models.inference.ai.azure.com for GitHub Models."
+        ),
+    )
+    ap.add_argument(
+        "--max-turns",
+        type=int,
+        default=100,
+        help="Maximum LLM turns before the session ends (default: 100).",
+    )
+    args = ap.parse_args()
+
+    videos_dir = args.videos_dir.resolve()
+    if not videos_dir.is_dir():
+        sys.exit(f"Not a directory: {videos_dir}")
+
+    run_session(
+        videos_dir=videos_dir,
+        model=args.model,
+        endpoint=args.endpoint,
+        max_turns=args.max_turns,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 1221f19..296cad6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
 
 [project.optional-dependencies]
 animations = ["manim"]
+copilot = ["openai>=1.0"]
 
 [build-system]
 requires = ["setuptools>=61.0"]

From 4f0d45a5e8e11d153f78e667dedb03292dc3a5ce Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 22 Apr 2026 08:58:58 +0000
Subject: [PATCH 2/8] feat: set claude-opus-4-7 as default model for GitHub
 Copilot orchestrator

Agent-Logs-Url: https://github.com/Delgerskhn/video-use/sessions/e28e4d80-3925-45dc-927c-dd0e31247735

Co-authored-by: Delgerskhn <57222574+Delgerskhn@users.noreply.github.com>
---
 README.md       | 11 +++++++----
 orchestrator.py | 13 +++++++------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index adf9c37..1179e80 100644
--- a/README.md
+++ b/README.md
@@ -81,10 +81,13 @@ python /path/to/video-use/orchestrator.py /path/to/your/videos
 Available options:
 
 ```
---model gpt-4o          # default; also: gpt-4o-mini, claude-3.5-sonnet, o3-mini
---endpoint <url>        # default: https://api.githubcopilot.com
-                        # GitHub Models alternative: https://models.inference.ai.azure.com
---max-turns 100         # safety cap on LLM turns (default: 100)
+--model claude-opus-4-7  # default — Anthropic Claude Opus 4.7 (strong reasoning + vision)
+--model gpt-4o           # OpenAI GPT-4o alternative
+--model claude-sonnet-4-5 # faster Anthropic option
+--model gpt-4o-mini      # fastest/lightest option
+--endpoint <url>         # default: https://api.githubcopilot.com
+                         # GitHub Models alternative: https://models.inference.ai.azure.com
+--max-turns 100          # safety cap on LLM turns (default: 100)
 ```
 
 And in the session:
diff --git a/orchestrator.py b/orchestrator.py
index 964fd2d..57fecc7 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -691,10 +691,11 @@ def main() -> None:
             "  GITHUB_TOKEN       Personal Access Token with 'copilot' scope (required)\n"
             "  ELEVENLABS_API_KEY ElevenLabs API key for transcription (required for transcribe tools)\n"
             "\nModel options (via GitHub Copilot):\n"
-            "  gpt-4o             Default — strong reasoning, vision support\n"
-            "  gpt-4o-mini        Faster and lighter\n"
-            "  claude-3.5-sonnet  Anthropic model via Copilot\n"
-            "  o3-mini            Reasoning model\n"
+            "  claude-opus-4-7    Default — Anthropic Claude Opus 4.7, strong reasoning + vision\n"
+            "  claude-sonnet-4-5  Anthropic Claude Sonnet 4.5 — faster, lighter\n"
+            "  gpt-4o             OpenAI GPT-4o — strong reasoning, vision support\n"
+            "  gpt-4o-mini        OpenAI GPT-4o mini — fastest OpenAI option\n"
+            "  o3-mini            OpenAI o3-mini — reasoning model\n"
             "\nAlternative endpoint (GitHub Models free tier):\n"
             "  --endpoint https://models.inference.ai.azure.com\n"
         ),
@@ -706,8 +707,8 @@ def main() -> None:
     )
     ap.add_argument(
         "--model",
-        default="gpt-4o",
-        help="Model identifier for the Copilot API (default: gpt-4o).",
+        default="claude-opus-4-7",
+        help="Model identifier for the Copilot API (default: claude-opus-4-7).",
     )
     ap.add_argument(
         "--endpoint",

From 6ddc103b61302f7c72ffe9ea167efaf2abf07751 Mon Sep 17 00:00:00 2001
From: delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Wed, 22 Apr 2026 17:52:26 +0800
Subject: [PATCH 3/8] fix: address PR review comments\n\n- Load .env
 automatically for GITHUB_TOKEN (matches transcribe.py pattern)\n- Disable
 bash tool by default; add --enable-bash flag for explicit opt-in\n- Return
 tool error on JSON parse failure instead of silently using {}\n- Default
 pack_transcripts edit_dir to session edit_dir when not provided\n- Sandbox
 read_file/write_file to videos_dir/edit_dir (path traversal fix)\n- Print
 message.content before executing tool calls so user sees it\n- Lock
 transcribe_batch to session videos_dir, ignore model-provided path\n-
 Downscale timeline images via ffmpeg before base64 embedding (max 1.5 MB)\n-
 Update .env.example to note that orchestrator loads .env automatically"

---
 .env.example    |   3 +-
 orchestrator.py | 112 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/.env.example b/.env.example
index f300000..f7de35e 100644
--- a/.env.example
+++ b/.env.example
@@ -2,5 +2,6 @@ ELEVENLABS_API_KEY=
 
 # Required for the GitHub Copilot orchestrator (orchestrator.py).
 # Create a Personal Access Token at https://github.com/settings/tokens
-# with the 'copilot' scope, then paste it here or export it in your shell.
+# with the 'copilot' scope, then paste it here. The orchestrator loads
+# this file automatically (like the transcription helpers do).
 GITHUB_TOKEN=
diff --git a/orchestrator.py b/orchestrator.py
index 57fecc7..627929b 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -37,6 +37,23 @@
 HELPERS_DIR = REPO_ROOT / "helpers"
 SKILL_MD = REPO_ROOT / "SKILL.md"
 
+
+def _load_env_file() -> None:
+    """Load key=value pairs from .env into os.environ (does not overwrite existing vars)."""
+    for candidate in [REPO_ROOT / ".env", Path(".env")]:
+        if candidate.exists():
+            for line in candidate.read_text().splitlines():
+                line = line.strip()
+                if not line or line.startswith("#") or "=" not in line:
+                    continue
+                k, v = line.split("=", 1)
+                k = k.strip()
+                v = v.strip().strip('"').strip("'")
+                if k and k not in os.environ:
+                    os.environ[k] = v
+            break
+
+
 # ---------------------------------------------------------------------------
 # System prompt
 # ---------------------------------------------------------------------------
@@ -353,11 +370,21 @@ def _format_result(returncode: int, stdout: str, stderr: str) -> str:
     return "\n".join(parts)
 
 
+def _is_under(path: Path, parent: Path) -> bool:
+    """Return True if *path* is the same as or nested under *parent*."""
+    try:
+        path.relative_to(parent.resolve())
+        return True
+    except ValueError:
+        return False
+
+
 def dispatch_tool(
     name: str,
     args: dict,
     videos_dir: Path,
     edit_dir: Path,
+    enable_bash: bool = False,
 ) -> tuple[str, Path | None]:
     """Execute a tool call. Returns (result_text, optional_image_path)."""
 
@@ -376,7 +403,8 @@ def dispatch_tool(
         return _format_result(rc, out, err), None
 
     if name == "transcribe_batch":
-        cmd = [str(HELPERS_DIR / "transcribe_batch.py"), args["videos_dir"]]
+        # Always use the session videos_dir to prevent operating outside it
+        cmd = [str(HELPERS_DIR / "transcribe_batch.py"), str(videos_dir)]
         if args.get("edit_dir"):
             cmd += ["--edit-dir", args["edit_dir"]]
         if args.get("workers"):
@@ -387,7 +415,7 @@ def dispatch_tool(
         return _format_result(rc, out, err), None
 
     if name == "pack_transcripts":
-        cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", args["edit_dir"]]
+        cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", args.get("edit_dir") or str(edit_dir)]
         if args.get("silence_threshold") is not None:
             cmd += ["--silence-threshold", str(args["silence_threshold"])]
         rc, out, err = _run_helper(cmd)
@@ -449,6 +477,12 @@ def dispatch_tool(
         return _format_result(rc, out, err), None
 
     if name == "bash":
+        if not enable_bash:
+            return (
+                "[bash tool is disabled by default. Restart the orchestrator with "
+                "--enable-bash to allow shell commands.]",
+                None,
+            )
         command = args["command"]
         proc = subprocess.run(
             command,
@@ -460,7 +494,9 @@ def dispatch_tool(
         return _format_result(proc.returncode, proc.stdout, proc.stderr), None
 
     if name == "read_file":
-        path = Path(args["path"])
+        path = Path(args["path"]).resolve()
+        if not (_is_under(path, videos_dir) or _is_under(path, edit_dir)):
+            return f"Access denied: path must be under {videos_dir} or {edit_dir}", None
         if not path.exists():
             return f"File not found: {path}", None
         try:
@@ -469,7 +505,9 @@ def dispatch_tool(
             return f"Error reading file: {e}", None
 
     if name == "write_file":
-        path = Path(args["path"])
+        path = Path(args["path"]).resolve()
+        if not _is_under(path, edit_dir):
+            return f"Access denied: write path must be under {edit_dir}", None
         path.parent.mkdir(parents=True, exist_ok=True)
         mode = "a" if args.get("append") else "w"
         try:
@@ -487,13 +525,47 @@ def dispatch_tool(
 # ---------------------------------------------------------------------------
 
 
+# Maximum image size (bytes) to embed in chat; larger images are downscaled first.
+MAX_IMAGE_BYTES = 1_500_000  # 1.5 MB
+
+
 def _build_image_message(img_path: Path) -> dict:
-    b64 = base64.b64encode(img_path.read_bytes()).decode()
+    """Embed a timeline image as a base64 data URL, downscaling via ffmpeg if needed."""
+    raw = img_path.read_bytes()
+    mime = "image/png"
+    if len(raw) > MAX_IMAGE_BYTES:
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
+            tmp_path = Path(tmp.name)
+        try:
+            subprocess.run(
+                [
+                    "ffmpeg", "-y", "-i", str(img_path),
+                    "-vf", "scale='min(960,iw)':-2",
+                    str(tmp_path),
+                ],
+                capture_output=True,
+                check=False,
+            )
+            raw = tmp_path.read_bytes()
+            mime = "image/jpeg"
+        except Exception:
+            pass
+        finally:
+            tmp_path.unlink(missing_ok=True)
+    if len(raw) > MAX_IMAGE_BYTES:
+        return {
+            "role": "user",
+            "content": (
+                f"[Timeline image too large to embed ({len(raw):,} bytes); "
+                f"saved to: {img_path}]"
+            ),
+        }
+    b64 = base64.b64encode(raw).decode()
     return {
         "role": "user",
         "content": [
             {"type": "text", "text": f"[Timeline view image: {img_path.name}]"},
-            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
+            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
         ],
     }
 
@@ -503,6 +575,7 @@ def run_session(
     model: str,
     endpoint: str,
     max_turns: int,
+    enable_bash: bool = False,
 ) -> None:
     try:
         from openai import OpenAI
@@ -513,6 +586,7 @@ def run_session(
             "or:            pip install openai"
         )
 
+    _load_env_file()
     github_token = os.environ.get("GITHUB_TOKEN", "").strip()
     if not github_token:
         sys.exit(
@@ -613,6 +687,10 @@ def run_session(
         messages.append(msg_dict)
 
         if message.tool_calls:
+            # Show any explanation the model included alongside the tool calls
+            if message.content:
+                print(f"\nAssistant: {message.content}\n")
+
             # Execute every requested tool call
             image_paths: list[Path] = []
 
@@ -621,7 +699,15 @@ def run_session(
                 try:
                     tool_args = json.loads(tc.function.arguments)
                 except json.JSONDecodeError:
-                    tool_args = {}
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": (
+                            f"[Invalid JSON in tool arguments — could not parse. "
+                            f"Raw arguments: {tc.function.arguments!r}. Please retry with valid JSON.]"
+                        ),
+                    })
+                    continue
 
                 # Pretty-print what we're doing
                 args_preview = ", ".join(
@@ -630,7 +716,7 @@ def run_session(
                 print(f"  [tool] {tool_name}({args_preview})", flush=True)
 
                 result_text, image_path = dispatch_tool(
-                    tool_name, tool_args, videos_dir, edit_dir
+                    tool_name, tool_args, videos_dir, edit_dir, enable_bash=enable_bash
                 )
 
                 # Truncate very long results so we don't blow the context window
@@ -725,6 +811,15 @@ def main() -> None:
         default=100,
         help="Maximum LLM turns before the session ends (default: 100).",
     )
+    ap.add_argument(
+        "--enable-bash",
+        action="store_true",
+        default=False,
+        help=(
+            "Enable the bash tool (disabled by default). "
+            "Only enable when you trust the model and understand the security implications."
+        ),
+    )
     args = ap.parse_args()
 
     videos_dir = args.videos_dir.resolve()
@@ -736,6 +831,7 @@ def main() -> None:
         model=args.model,
         endpoint=args.endpoint,
         max_turns=args.max_turns,
+        enable_bash=args.enable_bash,
     )
 
 

From adf06574f74a8e26e4c5811547ac8d70c80853f2 Mon Sep 17 00:00:00 2001
From: delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Wed, 22 Apr 2026 19:25:40 +0800
Subject: [PATCH 4/8] feat: migrate orchestrator to GitHub Copilot SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the manual OpenAI REST API orchestration loop with the official
GitHub Copilot SDK (github-copilot-sdk). Key changes:

orchestrator.py:
- Drop openai dependency; use CopilotClient + define_tool from copilot SDK
- SDK spawns the Copilot CLI subprocess automatically — no separate install
- Tools defined with @define_tool + Pydantic models instead of raw JSON schemas
- SDK manages session history, context compaction (infinite sessions) natively
- Authentication via copilot auth login OR GITHUB_TOKEN — SDK handles both
- Shell tool disabled by default via on_permission_request handler (--enable-shell
  flag to opt in), replacing the previous --enable-bash flag
- File writes sandboxed to edit_dir via the same permission handler
- pack_transcripts and transcribe_batch always use session edit_dir/videos_dir
- Timeline images attached via SDK blob attachment API instead of manual base64
  embedding in message history
- Async rewrite: asyncio.run() entry point, async tool handlers
- Remove --endpoint flag (no longer needed — SDK manages the CLI subprocess)
- Remove manual JSON parse error handling (SDK handles protocol errors)
- Increase default max_turns from 100 to 200

pyproject.toml:
- copilot extras: openai>=1.0 → github-copilot-sdk + pydantic>=2.0

.env.example:
- Document both auth options: copilot auth login vs GITHUB_TOKEN

README.md:
- Update Option B setup steps for Copilot SDK
- Add copilot auth login as recommended auth method
- Update model list to current Copilot CLI models
- Remove --endpoint option, add --enable-shell, /model mid-session note
---
 .env.example    |   10 +-
 README.md       |   30 +-
 orchestrator.py | 1054 +++++++++++++++++++----------------------------
 pyproject.toml  |    2 +-
 4 files changed, 443 insertions(+), 653 deletions(-)

diff --git a/.env.example b/.env.example
index f7de35e..9fa52cf 100644
--- a/.env.example
+++ b/.env.example
@@ -1,7 +1,11 @@
 ELEVENLABS_API_KEY=
 
 # Required for the GitHub Copilot orchestrator (orchestrator.py).
-# Create a Personal Access Token at https://github.com/settings/tokens
-# with the 'copilot' scope, then paste it here. The orchestrator loads
-# this file automatically (like the transcription helpers do).
+#
+# Option A — browser login (recommended, no token needed):
+#   copilot auth login
+#
+# Option B — Personal Access Token:
+#   Create one at https://github.com/settings/tokens with the 'copilot' scope,
+#   then paste it below. The orchestrator loads this file automatically.
 GITHUB_TOKEN=
diff --git a/README.md b/README.md
index 1179e80..e89fbe5 100644
--- a/README.md
+++ b/README.md
@@ -52,23 +52,27 @@ claude
 ### Option B — GitHub Copilot (no Anthropic key required)
 
 Uses your existing GitHub Copilot subscription as the LLM backend via the
-OpenAI-compatible Copilot API. Same pipeline, same production rules, same helpers.
+[GitHub Copilot SDK](https://github.com/github/copilot-sdk). The SDK bundles the
+Copilot CLI automatically — no separate CLI install needed. Same pipeline, same
+production rules, same helpers.
 
 ```bash
 # 1. Clone the repo
 git clone https://github.com/browser-use/video-use
 cd video-use
 
-# 2. Install deps (includes the openai SDK)
+# 2. Install deps (includes the Copilot SDK)
 pip install -e ".[copilot]"
 brew install ffmpeg           # required
 brew install yt-dlp            # optional
 
-# 3. Configure API keys
+# 3. Authenticate — pick one:
+copilot auth login             # Option A: browser login (recommended, no token needed)
+#  — OR —
 cp .env.example .env
 $EDITOR .env
 #   ELEVENLABS_API_KEY=...    ← for transcription (same as before)
-#   GITHUB_TOKEN=...          ← PAT with 'copilot' scope
+#   GITHUB_TOKEN=...          ← PAT with 'copilot' scope (option B)
 #                               https://github.com/settings/tokens
 ```
 
@@ -81,15 +85,19 @@ python /path/to/video-use/orchestrator.py /path/to/your/videos
 Available options:
 
 ```
---model claude-opus-4-7  # default — Anthropic Claude Opus 4.7 (strong reasoning + vision)
---model gpt-4o           # OpenAI GPT-4o alternative
---model claude-sonnet-4-5 # faster Anthropic option
---model gpt-4o-mini      # fastest/lightest option
---endpoint <url>         # default: https://api.githubcopilot.com
-                         # GitHub Models alternative: https://models.inference.ai.azure.com
---max-turns 100          # safety cap on LLM turns (default: 100)
+# Model (omit to let Copilot auto-select — recommended)
+--model claude-opus-4.5   # Anthropic Claude Opus 4.5 — complex tasks, deep reasoning
+--model claude-sonnet-4.5 # Anthropic Claude Sonnet 4.5 — faster, most routine tasks
+--model gpt-5             # OpenAI GPT-5
+--model gpt-4.1           # OpenAI GPT-4.1
+
+# Other flags
+--enable-shell            # enable built-in shell tool (off by default for safety)
+--max-turns 200           # safety cap on interactive turns (default: 200)
 ```
 
+You can also switch models mid-session with `/model` at the prompt.
+
 And in the session:
 
 > edit these into a launch video
diff --git a/orchestrator.py b/orchestrator.py
index 627929b..64b1a89 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -2,32 +2,36 @@
 """GitHub Copilot-backed video editing orchestrator for video-use.
 
 Replaces the `claude` CLI runtime with a standalone Python script that drives
-the same video editing pipeline using the GitHub Copilot API (OpenAI-compatible).
-All 12 hard production rules from SKILL.md are enforced via the same system
-prompt — no logic changes to the skill or helpers are needed.
+the same video editing pipeline using the GitHub Copilot SDK.  The SDK spawns
+the Copilot CLI as a subprocess automatically — no separate CLI install needed.
+
+All 12 hard production rules from SKILL.md are enforced via the system prompt.
+No logic changes to the skill or helpers are required.
 
 Requirements:
-  pip install -e ".[copilot]"          # openai>=1.0
-  export GITHUB_TOKEN=<your PAT>       # PAT with `copilot` scope
-  ELEVENLABS_API_KEY=... in .env       # for transcription (same as before)
+  pip install -e ".[copilot]"          # github-copilot-sdk + pydantic
+  GITHUB_TOKEN=... in .env             # PAT with 'copilot' scope
+    OR  run `copilot auth login` once  # sign in via browser (no token needed)
+  ELEVENLABS_API_KEY=... in .env       # for transcription
   ffmpeg and ffprobe on PATH
 
 Usage:
   python orchestrator.py /path/to/videos
-  python orchestrator.py /path/to/videos --model gpt-4o
-  python orchestrator.py /path/to/videos --endpoint https://models.inference.ai.azure.com
+  python orchestrator.py /path/to/videos --model claude-sonnet-4.5
+  python orchestrator.py /path/to/videos --enable-shell
 """
 
 from __future__ import annotations
 
 import argparse
+import asyncio
 import base64
-import json
 import os
 import subprocess
 import sys
 import tempfile
 from pathlib import Path
+from typing import Optional
 
 # ---------------------------------------------------------------------------
 # Repo-relative paths
@@ -70,283 +74,28 @@ def load_skill_prompt() -> str:
 
 
 # Maximum characters returned from a single tool call before truncation.
-# Keeps large files (packed transcripts, long ffmpeg logs) from consuming
-# the entire context window.
 MAX_TOOL_RESULT_LENGTH = 20_000
 
+# Maximum image size (bytes) to embed; larger images are downscaled first.
+MAX_IMAGE_BYTES = 1_500_000  # 1.5 MB
+
+
 # ---------------------------------------------------------------------------
-# Tool schemas (OpenAI function-calling format)
+# Path sandbox helper
 # ---------------------------------------------------------------------------
 
-TOOLS: list[dict] = [
-    {
-        "type": "function",
-        "function": {
-            "name": "transcribe",
-            "description": (
-                "Transcribe a single video with ElevenLabs Scribe. "
-                "Writes word-level transcript JSON to edit/transcripts/<stem>.json. "
-                "Cached — skips upload if the JSON already exists."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "video_path": {
-                        "type": "string",
-                        "description": "Absolute path to the video file.",
-                    },
-                    "edit_dir": {
-                        "type": "string",
-                        "description": "Edit output directory. Defaults to <video_parent>/edit.",
-                    },
-                    "language": {
-                        "type": "string",
-                        "description": "ISO language code (e.g. 'en'). Omit to auto-detect.",
-                    },
-                    "num_speakers": {
-                        "type": "integer",
-                        "description": "Number of speakers. Improves diarization when known.",
-                    },
-                },
-                "required": ["video_path"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "transcribe_batch",
-            "description": (
-                "Batch-transcribe every video in a directory using parallel workers. "
-                "Cached per source — already-transcribed files are skipped."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "videos_dir": {
-                        "type": "string",
-                        "description": "Directory containing source videos.",
-                    },
-                    "workers": {
-                        "type": "integer",
-                        "description": "Parallel workers (default 4).",
-                    },
-                    "edit_dir": {
-                        "type": "string",
-                        "description": "Override edit output directory.",
-                    },
-                    "num_speakers": {
-                        "type": "integer",
-                        "description": "Number of speakers (optional).",
-                    },
-                },
-                "required": ["videos_dir"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "pack_transcripts",
-            "description": (
-                "Pack all per-source transcript JSONs in edit/transcripts/ into "
-                "takes_packed.md — the primary phrase-level reading surface for cut decisions."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "edit_dir": {
-                        "type": "string",
-                        "description": "Edit output directory containing transcripts/ subdirectory.",
-                    },
-                    "silence_threshold": {
-                        "type": "number",
-                        "description": "Silence gap in seconds that triggers a phrase break (default 0.5).",
-                    },
-                },
-                "required": ["edit_dir"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "timeline_view",
-            "description": (
-                "Generate a filmstrip + waveform PNG for a time range of a video. "
-                "Use at decision points (ambiguous pauses, retake comparison, cut-point "
-                "sanity checks). NOT a scan tool — call only when you need a visual check."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "video_path": {
-                        "type": "string",
-                        "description": "Absolute path to the video file.",
-                    },
-                    "start": {
-                        "type": "number",
-                        "description": "Start time in seconds.",
-                    },
-                    "end": {
-                        "type": "number",
-                        "description": "End time in seconds.",
-                    },
-                    "n_frames": {
-                        "type": "integer",
-                        "description": "Number of filmstrip frames to extract (default 8).",
-                    },
-                    "transcript_path": {
-                        "type": "string",
-                        "description": "Optional path to a transcript JSON for word label overlay.",
-                    },
-                },
-                "required": ["video_path", "start", "end"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "render",
-            "description": (
-                "Render a video from an EDL (edit decision list JSON). "
-                "Runs the full pipeline: per-segment extract with grade + 30ms audio fades → "
-                "lossless concat → overlays (PTS-shifted) → subtitles LAST → loudnorm."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "edl_path": {
-                        "type": "string",
-                        "description": "Absolute path to edl.json.",
-                    },
-                    "output_path": {
-                        "type": "string",
-                        "description": "Output video path (e.g. edit/final.mp4).",
-                    },
-                    "preview": {
-                        "type": "boolean",
-                        "description": "Preview mode: 1080p, CRF 22, faster encode.",
-                    },
-                    "build_subtitles": {
-                        "type": "boolean",
-                        "description": "Build master.srt from transcripts + EDL timeline offsets.",
-                    },
-                    "no_subtitles": {
-                        "type": "boolean",
-                        "description": "Skip subtitles even if the EDL references one.",
-                    },
-                    "no_loudnorm": {
-                        "type": "boolean",
-                        "description": "Skip audio loudness normalization (default: on, -14 LUFS).",
-                    },
-                },
-                "required": ["edl_path", "output_path"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "grade",
-            "description": (
-                "Apply a color grade to a video via ffmpeg filter chain. "
-                "Presets: subtle, neutral_punch, warm_cinematic, none. "
-                "Omit both preset and filter for auto mode (data-driven per-clip correction)."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "input_path": {
-                        "type": "string",
-                        "description": "Input video path.",
-                    },
-                    "output_path": {
-                        "type": "string",
-                        "description": "Output video path.",
-                    },
-                    "preset": {
-                        "type": "string",
-                        "description": "Grade preset name.",
-                        "enum": ["subtle", "neutral_punch", "warm_cinematic", "none"],
-                    },
-                    "filter": {
-                        "type": "string",
-                        "description": "Raw ffmpeg filter string (overrides preset).",
-                    },
-                },
-                "required": ["input_path", "output_path"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "bash",
-            "description": (
-                "Run a shell command. Use for ffprobe, yt-dlp, file listing, "
-                "ffmpeg one-offs, and other system tasks the other tools don't cover."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "command": {
-                        "type": "string",
-                        "description": "Shell command to execute.",
-                    },
-                },
-                "required": ["command"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "read_file",
-            "description": "Read the contents of a text file (takes_packed.md, project.md, edl.json, transcripts, etc.).",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "path": {
-                        "type": "string",
-                        "description": "Absolute path to the file.",
-                    },
-                },
-                "required": ["path"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "write_file",
-            "description": "Write or append content to a file (edl.json, project.md, etc.).",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "path": {
-                        "type": "string",
-                        "description": "Absolute path to the file.",
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "Content to write.",
-                    },
-                    "append": {
-                        "type": "boolean",
-                        "description": "If true, append to existing file instead of overwriting.",
-                    },
-                },
-                "required": ["path", "content"],
-            },
-        },
-    },
-]
+
+def _is_under(path: Path, parent: Path) -> bool:
+    """Return True if *path* is the same as or nested under *parent*."""
+    try:
+        path.resolve().relative_to(parent.resolve())
+        return True
+    except ValueError:
+        return False
 
 
 # ---------------------------------------------------------------------------
-# Tool dispatch
+# Helpers runner
 # ---------------------------------------------------------------------------
 
 
@@ -367,172 +116,22 @@ def _format_result(returncode: int, stdout: str, stderr: str) -> str:
         parts.append("(no output)" if returncode == 0 else f"[exit {returncode}] (no output)")
     if returncode != 0:
         parts.insert(0, f"[exit code {returncode}]")
-    return "\n".join(parts)
-
-
-def _is_under(path: Path, parent: Path) -> bool:
-    """Return True if *path* is the same as or nested under *parent*."""
-    try:
-        path.relative_to(parent.resolve())
-        return True
-    except ValueError:
-        return False
-
-
-def dispatch_tool(
-    name: str,
-    args: dict,
-    videos_dir: Path,
-    edit_dir: Path,
-    enable_bash: bool = False,
-) -> tuple[str, Path | None]:
-    """Execute a tool call. Returns (result_text, optional_image_path)."""
-
-    if name == "transcribe":
-        video_path = args["video_path"]
-        cmd = [str(HELPERS_DIR / "transcribe.py"), video_path]
-        if args.get("edit_dir"):
-            cmd += ["--edit-dir", args["edit_dir"]]
-        else:
-            cmd += ["--edit-dir", str(edit_dir)]
-        if args.get("language"):
-            cmd += ["--language", args["language"]]
-        if args.get("num_speakers"):
-            cmd += ["--num-speakers", str(args["num_speakers"])]
-        rc, out, err = _run_helper(cmd)
-        return _format_result(rc, out, err), None
-
-    if name == "transcribe_batch":
-        # Always use the session videos_dir to prevent operating outside it
-        cmd = [str(HELPERS_DIR / "transcribe_batch.py"), str(videos_dir)]
-        if args.get("edit_dir"):
-            cmd += ["--edit-dir", args["edit_dir"]]
-        if args.get("workers"):
-            cmd += ["--workers", str(args["workers"])]
-        if args.get("num_speakers"):
-            cmd += ["--num-speakers", str(args["num_speakers"])]
-        rc, out, err = _run_helper(cmd)
-        return _format_result(rc, out, err), None
-
-    if name == "pack_transcripts":
-        cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", args.get("edit_dir") or str(edit_dir)]
-        if args.get("silence_threshold") is not None:
-            cmd += ["--silence-threshold", str(args["silence_threshold"])]
-        rc, out, err = _run_helper(cmd)
-        return _format_result(rc, out, err), None
-
-    if name == "timeline_view":
-        video_path = Path(args["video_path"])
-        start = args["start"]
-        end = args["end"]
-        verify_dir = edit_dir / "verify"
-        verify_dir.mkdir(parents=True, exist_ok=True)
-        out_img = verify_dir / f"timeline_{video_path.stem}_{start:.2f}_{end:.2f}.png"
-        cmd = [
-            str(HELPERS_DIR / "timeline_view.py"),
-            str(video_path),
-            str(start),
-            str(end),
-            "-o", str(out_img),
-        ]
-        if args.get("n_frames"):
-            cmd += ["--n-frames", str(args["n_frames"])]
-        if args.get("transcript_path"):
-            cmd += ["--transcript", args["transcript_path"]]
-        rc, out, err = _run_helper(cmd)
-        result = _format_result(rc, out, err)
-        if rc == 0 and out_img.exists():
-            result += f"\nImage saved to: {out_img}"
-            return result, out_img
-        return result, None
-
-    if name == "render":
-        cmd = [
-            str(HELPERS_DIR / "render.py"),
-            args["edl_path"],
-            "-o", args["output_path"],
-        ]
-        if args.get("preview"):
-            cmd.append("--preview")
-        if args.get("build_subtitles"):
-            cmd.append("--build-subtitles")
-        if args.get("no_subtitles"):
-            cmd.append("--no-subtitles")
-        if args.get("no_loudnorm"):
-            cmd.append("--no-loudnorm")
-        rc, out, err = _run_helper(cmd)
-        return _format_result(rc, out, err), None
-
-    if name == "grade":
-        cmd = [
-            str(HELPERS_DIR / "grade.py"),
-            args["input_path"],
-            "-o", args["output_path"],
-        ]
-        if args.get("filter"):
-            cmd += ["--filter", args["filter"]]
-        elif args.get("preset"):
-            cmd += ["--preset", args["preset"]]
-        rc, out, err = _run_helper(cmd)
-        return _format_result(rc, out, err), None
-
-    if name == "bash":
-        if not enable_bash:
-            return (
-                "[bash tool is disabled by default. Restart the orchestrator with "
-                "--enable-bash to allow shell commands.]",
-                None,
-            )
-        command = args["command"]
-        proc = subprocess.run(
-            command,
-            shell=True,
-            capture_output=True,
-            text=True,
-            timeout=300,
-        )
-        return _format_result(proc.returncode, proc.stdout, proc.stderr), None
-
-    if name == "read_file":
-        path = Path(args["path"]).resolve()
-        if not (_is_under(path, videos_dir) or _is_under(path, edit_dir)):
-            return f"Access denied: path must be under {videos_dir} or {edit_dir}", None
-        if not path.exists():
-            return f"File not found: {path}", None
-        try:
-            return path.read_text(), None
-        except Exception as e:
-            return f"Error reading file: {e}", None
-
-    if name == "write_file":
-        path = Path(args["path"]).resolve()
-        if not _is_under(path, edit_dir):
-            return f"Access denied: write path must be under {edit_dir}", None
-        path.parent.mkdir(parents=True, exist_ok=True)
-        mode = "a" if args.get("append") else "w"
-        try:
-            with open(path, mode) as f:
-                f.write(args["content"])
-            return f"Written to {path}", None
-        except Exception as e:
-            return f"Error writing file: {e}", None
-
-    return f"Unknown tool: {name}", None
+    result = "\n".join(parts)
+    if len(result) > MAX_TOOL_RESULT_LENGTH:
+        result = result[:MAX_TOOL_RESULT_LENGTH] + "\n... [truncated]"
+    return result
 
 
 # ---------------------------------------------------------------------------
-# Session loop
+# Image attachment helper
 # ---------------------------------------------------------------------------
 
 
-# Maximum image size (bytes) to embed in chat; larger images are downscaled first.
-MAX_IMAGE_BYTES = 1_500_000  # 1.5 MB
-
-
-def _build_image_message(img_path: Path) -> dict:
-    """Embed a timeline image as a base64 data URL, downscaling via ffmpeg if needed."""
+def _prepare_image_attachment(img_path: Path) -> dict:
+    """Return a blob attachment dict, downscaling via ffmpeg if > MAX_IMAGE_BYTES."""
     raw = img_path.read_bytes()
     mime = "image/png"
+
     if len(raw) > MAX_IMAGE_BYTES:
         with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
             tmp_path = Path(tmp.name)
@@ -552,212 +151,397 @@ def _build_image_message(img_path: Path) -> dict:
             pass
         finally:
             tmp_path.unlink(missing_ok=True)
+
     if len(raw) > MAX_IMAGE_BYTES:
-        return {
-            "role": "user",
-            "content": (
-                f"[Timeline image too large to embed ({len(raw):,} bytes); "
-                f"saved to: {img_path}]"
-            ),
-        }
-    b64 = base64.b64encode(raw).decode()
+        return {}  # too large — caller will skip attachment
+
     return {
-        "role": "user",
-        "content": [
-            {"type": "text", "text": f"[Timeline view image: {img_path.name}]"},
-            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
-        ],
+        "type": "blob",
+        "data": base64.b64encode(raw).decode(),
+        "mimeType": mime,
     }
 
 
-def run_session(
+# ---------------------------------------------------------------------------
+# Session loop
+# ---------------------------------------------------------------------------
+
+
+async def run_session(
     videos_dir: Path,
     model: str,
-    endpoint: str,
+    enable_shell: bool,
     max_turns: int,
-    enable_bash: bool = False,
 ) -> None:
     try:
-        from openai import OpenAI
+        from copilot import CopilotClient, SubprocessConfig, define_tool
+        from copilot.session import PermissionRequestResult
+        from pydantic import BaseModel, Field
     except ImportError:
         sys.exit(
-            "openai package not found.\n"
+            "Required packages not found.\n"
             "Install with:  pip install -e \".[copilot]\"\n"
-            "or:            pip install openai"
+            "or:            pip install github-copilot-sdk pydantic"
         )
 
     _load_env_file()
-    github_token = os.environ.get("GITHUB_TOKEN", "").strip()
-    if not github_token:
-        sys.exit(
-            "GITHUB_TOKEN is not set.\n"
-            "Export a Personal Access Token with the 'copilot' scope:\n"
-            "  export GITHUB_TOKEN=github_pat_..."
-        )
-
-    client = OpenAI(base_url=endpoint, api_key=github_token)
 
     edit_dir = videos_dir / "edit"
     edit_dir.mkdir(parents=True, exist_ok=True)
 
-    # Build system prompt with working-directory context injected at the end
-    system_prompt = load_skill_prompt()
-    system_prompt += (
-        f"\n\n## Session context\n\n"
-        f"- Videos directory: `{videos_dir}`\n"
-        f"- Edit directory: `{edit_dir}`\n"
-        f"- Helpers directory: `{HELPERS_DIR}`\n"
-        f"- All session outputs must go to `{edit_dir}/` (Hard Rule 12).\n"
+    # ------------------------------------------------------------------
+    # Tool parameter models
+    # ------------------------------------------------------------------
+
+    class TranscribeParams(BaseModel):
+        video_path: str = Field(description="Absolute path to the video file.")
+        language: Optional[str] = Field(default=None, description="ISO language code (e.g. 'en'). Omit to auto-detect.")
+        num_speakers: Optional[int] = Field(default=None, description="Number of speakers for diarization.")
+
+    class TranscribeBatchParams(BaseModel):
+        workers: Optional[int] = Field(default=None, description="Parallel workers (default 4).")
+        num_speakers: Optional[int] = Field(default=None, description="Number of speakers (optional).")
+
+    class PackTranscriptsParams(BaseModel):
+        silence_threshold: Optional[float] = Field(default=None, description="Silence gap in seconds that triggers a phrase break (default 0.5).")
+
+    class TimelineViewParams(BaseModel):
+        video_path: str = Field(description="Absolute path to the video file.")
+        start: float = Field(description="Start time in seconds.")
+        end: float = Field(description="End time in seconds.")
+        n_frames: Optional[int] = Field(default=None, description="Number of filmstrip frames to extract (default 8).")
+        transcript_path: Optional[str] = Field(default=None, description="Optional path to transcript JSON for word label overlay.")
+
+    class RenderParams(BaseModel):
+        edl_path: str = Field(description="Absolute path to edl.json.")
+        output_path: str = Field(description="Output video path (e.g. edit/final.mp4).")
+        preview: Optional[bool] = Field(default=None, description="Preview mode: 1080p, CRF 22, faster encode.")
+        build_subtitles: Optional[bool] = Field(default=None, description="Build master.srt from transcripts + EDL timeline offsets.")
+        no_subtitles: Optional[bool] = Field(default=None, description="Skip subtitles even if the EDL references one.")
+        no_loudnorm: Optional[bool] = Field(default=None, description="Skip audio loudness normalization.")
+
+    class GradeParams(BaseModel):
+        input_path: str = Field(description="Input video path.")
+        output_path: str = Field(description="Output video path.")
+        preset: Optional[str] = Field(default=None, description="Grade preset: subtle, neutral_punch, warm_cinematic, none.")
+        filter: Optional[str] = Field(default=None, description="Raw ffmpeg filter string (overrides preset).")
+
+    # ------------------------------------------------------------------
+    # Tool implementations
+    # ------------------------------------------------------------------
+
+    @define_tool(
+        description=(
+            "Transcribe a single video with ElevenLabs Scribe. "
+            "Writes word-level transcript JSON to edit/transcripts/<stem>.json. "
+            "Cached — skips upload if the JSON already exists."
+        ),
+        skip_permission=True,
+    )
+    async def transcribe(params: TranscribeParams) -> str:
+        cmd = [str(HELPERS_DIR / "transcribe.py"), params.video_path]
+        cmd += ["--edit-dir", str(edit_dir)]
+        if params.language:
+            cmd += ["--language", params.language]
+        if params.num_speakers:
+            cmd += ["--num-speakers", str(params.num_speakers)]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err)
+
+    @define_tool(
+        description=(
+            "Batch-transcribe every video in the session videos directory using parallel workers. "
+            "Cached per source — already-transcribed files are skipped."
+        ),
+        skip_permission=True,
     )
+    async def transcribe_batch(params: TranscribeBatchParams) -> str:
+        # Always use the session videos_dir — model cannot redirect this elsewhere
+        cmd = [str(HELPERS_DIR / "transcribe_batch.py"), str(videos_dir)]
+        cmd += ["--edit-dir", str(edit_dir)]
+        if params.workers:
+            cmd += ["--workers", str(params.workers)]
+        if params.num_speakers:
+            cmd += ["--num-speakers", str(params.num_speakers)]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err)
 
-    messages: list[dict] = [{"role": "system", "content": system_prompt}]
+    @define_tool(
+        description=(
+            "Pack all per-source transcript JSONs in edit/transcripts/ into "
+            "takes_packed.md — the primary phrase-level reading surface for cut decisions."
+        ),
+        skip_permission=True,
+    )
+    async def pack_transcripts(params: PackTranscriptsParams) -> str:
+        cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", str(edit_dir)]
+        if params.silence_threshold is not None:
+            cmd += ["--silence-threshold", str(params.silence_threshold)]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err)
 
-    # Seed with prior session memory if available
-    project_md = edit_dir / "project.md"
-    if project_md.exists():
-        prior = project_md.read_text().strip()
-        if prior:
-            messages.append({
-                "role": "user",
-                "content": (
-                    f"[Prior session memory — project.md]\n\n{prior}\n\n---\n"
-                    "I'm back. What should we pick up from or start fresh on?"
-                ),
-            })
-            messages.append({
-                "role": "assistant",
-                "content": (
-                    "I've reviewed the session notes above. Ready when you are — "
-                    "just tell me what you'd like to work on."
-                ),
-            })
-
-    print(f"\nvideo-use — GitHub Copilot orchestrator")
-    print(f"  model:    {model}")
-    print(f"  endpoint: {endpoint}")
-    print(f"  videos:   {videos_dir}")
-    print("Type your message. Enter 'exit' or press Ctrl+C to quit.\n")
+    # Side-channel for the last timeline image path so it can be attached in the
+    # next user message (the SDK attachment API goes on session.send, not tool results).
+    _pending_images: list[Path] = []
 
-    # Prompt for the first user message
-    try:
-        first_input = input("You: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        print("\nBye.")
-        return
+    @define_tool(
+        description=(
+            "Generate a filmstrip + waveform PNG for a time range of a video. "
+            "Use at decision points (ambiguous pauses, retake comparison, cut-point "
+            "sanity checks). NOT a scan tool — call only when you need a visual check."
+        ),
+        skip_permission=True,
+    )
+    async def timeline_view(params: TimelineViewParams) -> str:
+        video_path = Path(params.video_path)
+        verify_dir = edit_dir / "verify"
+        verify_dir.mkdir(parents=True, exist_ok=True)
+        out_img = verify_dir / f"timeline_{video_path.stem}_{params.start:.2f}_{params.end:.2f}.png"
+        cmd = [
+            str(HELPERS_DIR / "timeline_view.py"),
+            str(video_path),
+            str(params.start),
+            str(params.end),
+            "-o", str(out_img),
+        ]
+        if params.n_frames:
+            cmd += ["--n-frames", str(params.n_frames)]
+        if params.transcript_path:
+            cmd += ["--transcript", params.transcript_path]
+        rc, out, err = _run_helper(cmd)
+        result = _format_result(rc, out, err)
+        if rc == 0 and out_img.exists():
+            _pending_images.append(out_img)
+            result += f"\nImage saved to: {out_img} (will be attached to your next reply)"
+        return result
+
+    @define_tool(
+        description=(
+            "Render a video from an EDL (edit decision list JSON). "
+            "Runs the full pipeline: per-segment extract with grade + 30ms audio fades → "
+            "lossless concat → overlays → subtitles LAST → loudnorm."
+        ),
+        skip_permission=True,
+    )
+    async def render(params: RenderParams) -> str:
+        cmd = [
+            str(HELPERS_DIR / "render.py"),
+            params.edl_path,
+            "-o", params.output_path,
+        ]
+        if params.preview:
+            cmd.append("--preview")
+        if params.build_subtitles:
+            cmd.append("--build-subtitles")
+        if params.no_subtitles:
+            cmd.append("--no-subtitles")
+        if params.no_loudnorm:
+            cmd.append("--no-loudnorm")
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err)
+
+    @define_tool(
+        description=(
+            "Apply a color grade to a video via ffmpeg filter chain. "
+            "Presets: subtle, neutral_punch, warm_cinematic, none. "
+            "Omit both preset and filter for auto mode (data-driven per-clip correction)."
+        ),
+        skip_permission=True,
+    )
+    async def grade(params: GradeParams) -> str:
+        cmd = [
+            str(HELPERS_DIR / "grade.py"),
+            params.input_path,
+            "-o", params.output_path,
+        ]
+        if params.filter:
+            cmd += ["--filter", params.filter]
+        elif params.preset:
+            cmd += ["--preset", params.preset]
+        rc, out, err = _run_helper(cmd)
+        return _format_result(rc, out, err)
 
-    if not first_input or first_input.lower() in ("exit", "quit", "q"):
-        print("Bye.")
-        return
+    # ------------------------------------------------------------------
+    # Permission handler — sandboxes file writes to edit_dir; shell off by default
+    # ------------------------------------------------------------------
 
-    messages.append({"role": "user", "content": first_input})
+    def on_permission_request(request, invocation) -> "PermissionRequestResult":
+        kind = request.kind.value if hasattr(request.kind, "value") else str(request.kind)
 
-    turn = 0
-    while turn < max_turns:
+        if kind == "shell" and not enable_shell:
+            print(
+                "\n[shell tool blocked — restart with --enable-shell to allow shell commands]",
+                flush=True,
+            )
+            return PermissionRequestResult(kind="denied-interactively-by-user")
+
+        if kind == "write":
+            file_name = getattr(request, "file_name", None) or ""
+            if file_name and not _is_under(Path(file_name), edit_dir):
+                print(f"\n[write blocked — path outside edit_dir: {file_name}]", flush=True)
+                return PermissionRequestResult(kind="denied-by-rules")
+
+        return PermissionRequestResult(kind="approved")
+
+    # ------------------------------------------------------------------
+    # User input handler (enables ask_user tool in the CLI)
+    # ------------------------------------------------------------------
+
+    async def on_user_input_request(request, invocation) -> dict:
+        question = request.get("question", "")
+        choices = request.get("choices")
+        print(f"\nAssistant asks: {question}")
+        if choices:
+            for i, c in enumerate(choices, 1):
+                print(f"  {i}. {c}")
         try:
-            response = client.chat.completions.create(
-                model=model,
-                messages=messages,
-                tools=TOOLS,
-                max_tokens=4096,
+            answer = await asyncio.get_event_loop().run_in_executor(
+                None, lambda: input("Your answer: ").strip()
             )
-        except KeyboardInterrupt:
-            print("\n[Interrupted]")
-            break
-        except Exception as e:
-            print(f"\n[API error: {e}]")
-            break
-
-        choice = response.choices[0]
-        message = choice.message
-
-        # Serialize the assistant message back into the history
-        msg_dict: dict = {"role": "assistant", "content": message.content}
-        if message.tool_calls:
-            msg_dict["tool_calls"] = [
-                {
-                    "id": tc.id,
-                    "type": "function",
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments,
-                    },
-                }
-                for tc in message.tool_calls
-            ]
-        messages.append(msg_dict)
-
-        if message.tool_calls:
-            # Show any explanation the model included alongside the tool calls
-            if message.content:
-                print(f"\nAssistant: {message.content}\n")
-
-            # Execute every requested tool call
-            image_paths: list[Path] = []
-
-            for tc in message.tool_calls:
-                tool_name = tc.function.name
-                try:
-                    tool_args = json.loads(tc.function.arguments)
-                except json.JSONDecodeError:
-                    messages.append({
-                        "role": "tool",
-                        "tool_call_id": tc.id,
-                        "content": (
-                            f"[Invalid JSON in tool arguments — could not parse. "
-                            f"Raw arguments: {tc.function.arguments!r}. Please retry with valid JSON.]"
-                        ),
-                    })
-                    continue
-
-                # Pretty-print what we're doing
-                args_preview = ", ".join(
-                    f"{k}={v!r}" for k, v in list(tool_args.items())[:3]
-                )
-                print(f"  [tool] {tool_name}({args_preview})", flush=True)
+        except (EOFError, KeyboardInterrupt):
+            answer = ""
+        return {"answer": answer, "wasFreeform": True}
 
-                result_text, image_path = dispatch_tool(
-                    tool_name, tool_args, videos_dir, edit_dir, enable_bash=enable_bash
-                )
+    # ------------------------------------------------------------------
+    # Build system prompt
+    # ------------------------------------------------------------------
 
-                # Truncate very long results so we don't blow the context window
-                if len(result_text) > MAX_TOOL_RESULT_LENGTH:
-                    result_text = result_text[:MAX_TOOL_RESULT_LENGTH] + "\n... [truncated]"
+    system_content = (
+        load_skill_prompt()
+        + f"\n\n## Session context\n\n"
+        f"- Videos directory: `{videos_dir}`\n"
+        f"- Edit directory: `{edit_dir}`\n"
+        f"- Helpers directory: `{HELPERS_DIR}`\n"
+        f"- All session outputs must go to `{edit_dir}/` (Hard Rule 12).\n"
+    )
 
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tc.id,
-                    "content": result_text,
-                })
+    # ------------------------------------------------------------------
+    # Print banner
+    # ------------------------------------------------------------------
 
-                if image_path and image_path.exists():
-                    image_paths.append(image_path)
+    print(f"\nvideo-use — GitHub Copilot SDK orchestrator")
+    print(f"  model:    {model or 'auto (Copilot selects)'}")
+    print(f"  videos:   {videos_dir}")
+    print(f"  shell:    {'enabled' if enable_shell else 'disabled  (--enable-shell to allow)'}")
+    print("Type your message. Enter 'exit' or press Ctrl+C to quit.\n")
 
-            # Inject timeline view images as user messages so vision-capable
-            # models (gpt-4o, etc.) can reason about them
-            for img_path in image_paths:
-                messages.append(_build_image_message(img_path))
+    # ------------------------------------------------------------------
+    # Prior session memory
+    # ------------------------------------------------------------------
 
-            turn += 1
-            continue  # Let the model respond to the tool results
+    project_md = edit_dir / "project.md"
+    initial_context: str | None = None
+    if project_md.exists():
+        prior = project_md.read_text().strip()
+        if prior:
+            initial_context = (
+                f"[Prior session memory — project.md]\n\n{prior}\n\n---\n"
+                "I'm back. What should we pick up from or start fresh on?"
+            )
 
-        # No tool calls — conversational turn
-        if message.content:
-            print(f"\nAssistant: {message.content}\n")
+    # ------------------------------------------------------------------
+    # SDK client + session
+    # ------------------------------------------------------------------
 
-        if choice.finish_reason == "stop":
-            try:
-                user_input = input("You: ").strip()
-            except (EOFError, KeyboardInterrupt):
-                print("\nBye.")
-                break
+    github_token = os.environ.get("GITHUB_TOKEN", "").strip() or None
+    config = SubprocessConfig(
+        cwd=str(videos_dir),
+        github_token=github_token,
+    )
 
-            if not user_input or user_input.lower() in ("exit", "quit", "q"):
-                print("Bye.")
-                break
+    session_kwargs: dict = dict(
+        on_permission_request=on_permission_request,
+        on_user_input_request=on_user_input_request,
+        tools=[transcribe, transcribe_batch, pack_transcripts, timeline_view, render, grade],
+        system_message={"content": system_content},
+        streaming=True,
+    )
+    if model:
+        session_kwargs["model"] = model
 
-            messages.append({"role": "user", "content": user_input})
+    try:
+        from copilot.generated.session_events import (
+            AssistantMessageData,
+            AssistantMessageDeltaData,
+            SessionIdleData,
+        )
+    except ImportError:
+        # Older SDK versions may use a different import path
+        from copilot.session_events import (  # type: ignore[no-redef]
+            AssistantMessageData,
+            AssistantMessageDeltaData,
+            SessionIdleData,
+        )
 
-        turn += 1
+    async with CopilotClient(config) as client:
+        async with await client.create_session(**session_kwargs) as session:
+
+            # Seed prior session memory as first user turn
+            if initial_context:
+                seed_done = asyncio.Event()
+
+                def _on_seed(event):
+                    match event.data:
+                        case AssistantMessageData():
+                            seed_done.set()
+                        case SessionIdleData():
+                            seed_done.set()
+
+                unsub_seed = session.on(_on_seed)
+                await session.send(initial_context)
+                await seed_done.wait()
+                unsub_seed()
+                print()
+
+            turn = 0
+            while turn < max_turns:
+                # Collect any pending timeline images
+                attachments: list[dict] = []
+                while _pending_images:
+                    img_path = _pending_images.pop(0)
+                    if img_path.exists():
+                        att = _prepare_image_attachment(img_path)
+                        if att:
+                            attachments.append(att)
+
+                # Prompt user
+                try:
+                    user_input = await asyncio.get_event_loop().run_in_executor(
+                        None, lambda: input("You: ").strip()
+                    )
+                except (EOFError, KeyboardInterrupt):
+                    print("\nBye.")
+                    break
+
+                if not user_input or user_input.lower() in ("exit", "quit", "q"):
+                    print("Bye.")
+                    break
+
+                # Wait for full response
+                response_done = asyncio.Event()
+                print("\nAssistant: ", end="", flush=True)
+
+                def on_event(event):
+                    match event.data:
+                        case AssistantMessageDeltaData() as data:
+                            delta = data.delta_content or ""
+                            print(delta, end="", flush=True)
+                        case AssistantMessageData():
+                            print()  # ensure newline after full message
+                        case SessionIdleData():
+                            response_done.set()
+
+                unsub = session.on(on_event)
+                send_kwargs: dict = {"prompt": user_input}
+                if attachments:
+                    send_kwargs["attachments"] = attachments
+
+                await session.send(**send_kwargs)
+                await response_done.wait()
+                unsub()
+                print()
+
+                turn += 1
 
     if turn >= max_turns:
         print(f"\n[Reached max_turns={max_turns}. Session ended.]")
@@ -770,20 +554,22 @@ def run_session(
 
 def main() -> None:
     ap = argparse.ArgumentParser(
-        description="GitHub Copilot-backed video editing orchestrator for video-use.",
+        description="GitHub Copilot SDK video editing orchestrator for video-use.",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=(
-            "Environment variables:\n"
-            "  GITHUB_TOKEN       Personal Access Token with 'copilot' scope (required)\n"
-            "  ELEVENLABS_API_KEY ElevenLabs API key for transcription (required for transcribe tools)\n"
-            "\nModel options (via GitHub Copilot):\n"
-            "  claude-opus-4-7    Default — Anthropic Claude Opus 4.7, strong reasoning + vision\n"
-            "  claude-sonnet-4-5  Anthropic Claude Sonnet 4.5 — faster, lighter\n"
-            "  gpt-4o             OpenAI GPT-4o — strong reasoning, vision support\n"
-            "  gpt-4o-mini        OpenAI GPT-4o mini — fastest OpenAI option\n"
-            "  o3-mini            OpenAI o3-mini — reasoning model\n"
-            "\nAlternative endpoint (GitHub Models free tier):\n"
-            "  --endpoint https://models.inference.ai.azure.com\n"
+            "Authentication (pick one):\n"
+            "  copilot auth login              Sign in via browser — no token needed\n"
+            "  GITHUB_TOKEN=... in .env        PAT with 'copilot' scope\n"
+            "    https://github.com/settings/tokens\n"
+            "\nModel options (via GitHub Copilot CLI — use /model inside session to switch):\n"
+            "  (omit --model)                  Copilot auto-selects the best model\n"
+            "  claude-opus-4.5                 Anthropic Claude Opus 4.5 — complex tasks\n"
+            "  claude-sonnet-4.5               Anthropic Claude Sonnet 4.5 — faster\n"
+            "  gpt-5                           OpenAI GPT-5\n"
+            "  gpt-4.1                         OpenAI GPT-4.1\n"
+            "\nEnvironment variables:\n"
+            "  GITHUB_TOKEN        PAT with 'copilot' scope (alternative to browser login)\n"
+            "  ELEVENLABS_API_KEY  ElevenLabs API key for transcription\n"
         ),
     )
     ap.add_argument(
@@ -793,32 +579,23 @@ def main() -> None:
     )
     ap.add_argument(
         "--model",
-        default="claude-opus-4-7",
-        help="Model identifier for the Copilot API (default: claude-opus-4-7).",
+        default="",
+        help="Model identifier (default: Copilot auto-selects). Use /model inside session to switch.",
     )
     ap.add_argument(
-        "--endpoint",
-        default="https://api.githubcopilot.com",
+        "--enable-shell",
+        action="store_true",
+        default=False,
         help=(
-            "GitHub Copilot API base URL "
-            "(default: https://api.githubcopilot.com). "
-            "Use https://models.inference.ai.azure.com for GitHub Models."
+            "Enable the built-in shell tool (disabled by default). "
+            "Only enable when you trust the model and understand the security implications."
         ),
     )
     ap.add_argument(
         "--max-turns",
         type=int,
-        default=100,
-        help="Maximum LLM turns before the session ends (default: 100).",
-    )
-    ap.add_argument(
-        "--enable-bash",
-        action="store_true",
-        default=False,
-        help=(
-            "Enable the bash tool (disabled by default). "
-            "Only enable when you trust the model and understand the security implications."
-        ),
+        default=200,
+        help="Maximum interactive turns before the session ends (default: 200).",
     )
     args = ap.parse_args()
 
@@ -826,12 +603,13 @@ def main() -> None:
     if not videos_dir.is_dir():
         sys.exit(f"Not a directory: {videos_dir}")
 
-    run_session(
-        videos_dir=videos_dir,
-        model=args.model,
-        endpoint=args.endpoint,
-        max_turns=args.max_turns,
-        enable_bash=args.enable_bash,
+    asyncio.run(
+        run_session(
+            videos_dir=videos_dir,
+            model=args.model,
+            enable_shell=args.enable_shell,
+            max_turns=args.max_turns,
+        )
     )
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 296cad6..211866a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
 
 [project.optional-dependencies]
 animations = ["manim"]
-copilot = ["openai>=1.0"]
+copilot = ["github-copilot-sdk", "pydantic>=2.0"]
 
 [build-system]
 requires = ["setuptools>=61.0"]

From 9a7d73d32f0cc52c7545dba274f9eaae40488d64 Mon Sep 17 00:00:00 2001
From: delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Wed, 22 Apr 2026 19:31:51 +0800
Subject: [PATCH 5/8] fix: move Pydantic models to module level to fix
 NameError on startup

define_tool calls get_type_hints() which resolves annotations from the
module's global namespace. Models defined inside run_session() are
invisible to it, causing NameError: name 'TranscribeParams' is not defined.

Also move copilot/pydantic imports to module level with a graceful fallback
(deferred ImportError displayed at runtime via sys.exit).
---
 orchestrator.py | 96 +++++++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/orchestrator.py b/orchestrator.py
index 64b1a89..c01fc05 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -33,6 +33,15 @@
 from pathlib import Path
 from typing import Optional
 
+try:
+    from copilot import CopilotClient, SubprocessConfig, define_tool
+    from copilot.session import PermissionRequestResult
+    from pydantic import BaseModel, Field
+except ImportError:  # deferred error — shown at runtime with a friendly message
+    CopilotClient = SubprocessConfig = define_tool = PermissionRequestResult = None  # type: ignore
+    BaseModel = object  # type: ignore
+    Field = lambda **_: None  # type: ignore
+
 # ---------------------------------------------------------------------------
 # Repo-relative paths
 # ---------------------------------------------------------------------------
@@ -162,6 +171,50 @@ def _prepare_image_attachment(img_path: Path) -> dict:
     }
 
 
+# ---------------------------------------------------------------------------
+# Tool parameter models (module-level so get_type_hints() can resolve them)
+# ---------------------------------------------------------------------------
+
+
+class TranscribeParams(BaseModel):
+    video_path: str = Field(description="Absolute path to the video file.")
+    language: Optional[str] = Field(default=None, description="ISO language code (e.g. 'en'). Omit to auto-detect.")
+    num_speakers: Optional[int] = Field(default=None, description="Number of speakers for diarization.")
+
+
+class TranscribeBatchParams(BaseModel):
+    workers: Optional[int] = Field(default=None, description="Parallel workers (default 4).")
+    num_speakers: Optional[int] = Field(default=None, description="Number of speakers (optional).")
+
+
+class PackTranscriptsParams(BaseModel):
+    silence_threshold: Optional[float] = Field(default=None, description="Silence gap in seconds that triggers a phrase break (default 0.5).")
+
+
+class TimelineViewParams(BaseModel):
+    video_path: str = Field(description="Absolute path to the video file.")
+    start: float = Field(description="Start time in seconds.")
+    end: float = Field(description="End time in seconds.")
+    n_frames: Optional[int] = Field(default=None, description="Number of filmstrip frames to extract (default 8).")
+    transcript_path: Optional[str] = Field(default=None, description="Optional path to transcript JSON for word label overlay.")
+
+
+class RenderParams(BaseModel):
+    edl_path: str = Field(description="Absolute path to edl.json.")
+    output_path: str = Field(description="Output video path (e.g. edit/final.mp4).")
+    preview: Optional[bool] = Field(default=None, description="Preview mode: 1080p, CRF 22, faster encode.")
+    build_subtitles: Optional[bool] = Field(default=None, description="Build master.srt from transcripts + EDL timeline offsets.")
+    no_subtitles: Optional[bool] = Field(default=None, description="Skip subtitles even if the EDL references one.")
+    no_loudnorm: Optional[bool] = Field(default=None, description="Skip audio loudness normalization.")
+
+
+class GradeParams(BaseModel):
+    input_path: str = Field(description="Input video path.")
+    output_path: str = Field(description="Output video path.")
+    preset: Optional[str] = Field(default=None, description="Grade preset: subtle, neutral_punch, warm_cinematic, none.")
+    filter: Optional[str] = Field(default=None, description="Raw ffmpeg filter string (overrides preset).")
+
+
 # ---------------------------------------------------------------------------
 # Session loop
 # ---------------------------------------------------------------------------
@@ -173,11 +226,7 @@ async def run_session(
     enable_shell: bool,
     max_turns: int,
 ) -> None:
-    try:
-        from copilot import CopilotClient, SubprocessConfig, define_tool
-        from copilot.session import PermissionRequestResult
-        from pydantic import BaseModel, Field
-    except ImportError:
+    if CopilotClient is None:
         sys.exit(
             "Required packages not found.\n"
             "Install with:  pip install -e \".[copilot]\"\n"
@@ -189,43 +238,6 @@ async def run_session(
     edit_dir = videos_dir / "edit"
     edit_dir.mkdir(parents=True, exist_ok=True)
 
-    # ------------------------------------------------------------------
-    # Tool parameter models
-    # ------------------------------------------------------------------
-
-    class TranscribeParams(BaseModel):
-        video_path: str = Field(description="Absolute path to the video file.")
-        language: Optional[str] = Field(default=None, description="ISO language code (e.g. 'en'). Omit to auto-detect.")
-        num_speakers: Optional[int] = Field(default=None, description="Number of speakers for diarization.")
-
-    class TranscribeBatchParams(BaseModel):
-        workers: Optional[int] = Field(default=None, description="Parallel workers (default 4).")
-        num_speakers: Optional[int] = Field(default=None, description="Number of speakers (optional).")
-
-    class PackTranscriptsParams(BaseModel):
-        silence_threshold: Optional[float] = Field(default=None, description="Silence gap in seconds that triggers a phrase break (default 0.5).")
-
-    class TimelineViewParams(BaseModel):
-        video_path: str = Field(description="Absolute path to the video file.")
-        start: float = Field(description="Start time in seconds.")
-        end: float = Field(description="End time in seconds.")
-        n_frames: Optional[int] = Field(default=None, description="Number of filmstrip frames to extract (default 8).")
-        transcript_path: Optional[str] = Field(default=None, description="Optional path to transcript JSON for word label overlay.")
-
-    class RenderParams(BaseModel):
-        edl_path: str = Field(description="Absolute path to edl.json.")
-        output_path: str = Field(description="Output video path (e.g. edit/final.mp4).")
-        preview: Optional[bool] = Field(default=None, description="Preview mode: 1080p, CRF 22, faster encode.")
-        build_subtitles: Optional[bool] = Field(default=None, description="Build master.srt from transcripts + EDL timeline offsets.")
-        no_subtitles: Optional[bool] = Field(default=None, description="Skip subtitles even if the EDL references one.")
-        no_loudnorm: Optional[bool] = Field(default=None, description="Skip audio loudness normalization.")
-
-    class GradeParams(BaseModel):
-        input_path: str = Field(description="Input video path.")
-        output_path: str = Field(description="Output video path.")
-        preset: Optional[str] = Field(default=None, description="Grade preset: subtle, neutral_punch, warm_cinematic, none.")
-        filter: Optional[str] = Field(default=None, description="Raw ffmpeg filter string (overrides preset).")
-
     # ------------------------------------------------------------------
     # Tool implementations
     # ------------------------------------------------------------------

From 8b35fa7faf62632c8cd334dfd01dadb6919356f4 Mon Sep 17 00:00:00 2001
From: delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Wed, 22 Apr 2026 20:16:33 +0800
Subject: [PATCH 6/8] refactor: streamline event handling in run_session for
 Copilot SDK integration

---
 orchestrator.py | 50 ++++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/orchestrator.py b/orchestrator.py
index c01fc05..4aa6d99 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -471,19 +471,11 @@ async def on_user_input_request(request, invocation) -> dict:
     if model:
         session_kwargs["model"] = model
 
-    try:
-        from copilot.generated.session_events import (
-            AssistantMessageData,
-            AssistantMessageDeltaData,
-            SessionIdleData,
-        )
-    except ImportError:
-        # Older SDK versions may use a different import path
-        from copilot.session_events import (  # type: ignore[no-redef]
-            AssistantMessageData,
-            AssistantMessageDeltaData,
-            SessionIdleData,
-        )
+    from copilot.generated.session_events import SessionEventType
+
+    def _event_type(event) -> str:
+        et = getattr(event, "type", "")
+        return str(getattr(et, "value", et))
 
     async with CopilotClient(config) as client:
         async with await client.create_session(**session_kwargs) as session:
@@ -493,11 +485,12 @@ async def on_user_input_request(request, invocation) -> dict:
                 seed_done = asyncio.Event()
 
                 def _on_seed(event):
-                    match event.data:
-                        case AssistantMessageData():
-                            seed_done.set()
-                        case SessionIdleData():
-                            seed_done.set()
+                    et = _event_type(event)
+                    if et in (
+                        SessionEventType.ASSISTANT_MESSAGE.value,
+                        SessionEventType.SESSION_IDLE.value,
+                    ):
+                        seed_done.set()
 
                 unsub_seed = session.on(_on_seed)
                 await session.send(initial_context)
@@ -534,14 +527,19 @@ def _on_seed(event):
                 print("\nAssistant: ", end="", flush=True)
 
                 def on_event(event):
-                    match event.data:
-                        case AssistantMessageDeltaData() as data:
-                            delta = data.delta_content or ""
-                            print(delta, end="", flush=True)
-                        case AssistantMessageData():
-                            print()  # ensure newline after full message
-                        case SessionIdleData():
-                            response_done.set()
+                    et = _event_type(event)
+                    # Support both current "assistant.message_delta" and legacy
+                    # docs/examples that use "assistant.message.delta".
+                    if et in (
+                        SessionEventType.ASSISTANT_MESSAGE_DELTA.value,
+                        "assistant.message.delta",
+                    ):
+                        delta = getattr(event.data, "delta_content", "") or ""
+                        print(delta, end="", flush=True)
+                    elif et == SessionEventType.ASSISTANT_MESSAGE.value:
+                        print()  # ensure newline after full message
+                    elif et == SessionEventType.SESSION_IDLE.value:
+                        response_done.set()
 
                 unsub = session.on(on_event)
                 send_kwargs: dict = {"prompt": user_input}

From 6b395779867e6163ed5c7151bdbde2c6e90537be Mon Sep 17 00:00:00 2001
From: delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Fri, 24 Apr 2026 11:53:18 +0800
Subject: [PATCH 7/8] Address PR review feedback

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .env.example    |   5 +-
 README.md       |   4 +-
 orchestrator.py | 121 ++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/.env.example b/.env.example
index 9fa52cf..e34aee0 100644
--- a/.env.example
+++ b/.env.example
@@ -6,6 +6,7 @@ ELEVENLABS_API_KEY=
 #   copilot auth login
 #
 # Option B — Personal Access Token:
-#   Create one at https://github.com/settings/tokens with the 'copilot' scope,
-#   then paste it below. The orchestrator loads this file automatically.
+#   Create a fine-grained token at https://github.com/settings/tokens
+#   with Copilot Requests permission, then paste it below.
+#   The orchestrator loads this file automatically.
 GITHUB_TOKEN=
diff --git a/README.md b/README.md
index e89fbe5..1e6add6 100644
--- a/README.md
+++ b/README.md
@@ -72,8 +72,8 @@ copilot auth login             # Option A: browser login (recommended, no token
 cp .env.example .env
 $EDITOR .env
 #   ELEVENLABS_API_KEY=...    ← for transcription (same as before)
-#   GITHUB_TOKEN=...          ← PAT with 'copilot' scope (option B)
-#                               https://github.com/settings/tokens
+#   GITHUB_TOKEN=...          ← fine-grained token with Copilot Requests permission
+#                               https://github.com/settings/tokens (option B)
 ```
 
 Then run the orchestrator against your video folder:
diff --git a/orchestrator.py b/orchestrator.py
index 4aa6d99..075fd10 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -10,7 +10,7 @@
 
 Requirements:
   pip install -e ".[copilot]"          # github-copilot-sdk + pydantic
-  GITHUB_TOKEN=... in .env             # PAT with 'copilot' scope
+  GITHUB_TOKEN=... in .env             # fine-grained token with Copilot Requests permission
     OR  run `copilot auth login` once  # sign in via browser (no token needed)
   ELEVENLABS_API_KEY=... in .env       # for transcription
   ffmpeg and ffprobe on PATH
@@ -103,16 +103,49 @@ def _is_under(path: Path, parent: Path) -> bool:
         return False
 
 
+def _resolve_session_path(raw_path: str, base_dir: Path) -> Path:
+    """Resolve a model-provided path relative to the session videos directory."""
+    path = Path(raw_path)
+    if not path.is_absolute():
+        path = base_dir / path
+    return path.resolve()
+
+
+def _validate_edit_dir_path(
+    raw_path: str,
+    *,
+    videos_dir: Path,
+    edit_dir: Path,
+    label: str,
+    must_exist: bool = False,
+) -> Path:
+    """Resolve a session path and enforce the edit_dir sandbox."""
+    path = _resolve_session_path(raw_path, videos_dir)
+    if not _is_under(path, edit_dir):
+        raise ValueError(f"{label} must stay inside {edit_dir}: {raw_path}")
+    if must_exist and not path.exists():
+        raise ValueError(f"{label} does not exist: {path}")
+    return path
+
+
 # ---------------------------------------------------------------------------
 # Helpers runner
 # ---------------------------------------------------------------------------
 
 
-def _run_helper(args: list[str]) -> tuple[int, str, str]:
-    """Run a Python helper from the helpers/ directory."""
+async def _run_helper(args: list[str]) -> tuple[int, str, str]:
+    """Run a Python helper from the helpers/ directory without blocking the event loop."""
     cmd = [sys.executable] + args
-    proc = subprocess.run(cmd, capture_output=True, text=True)
-    return proc.returncode, proc.stdout, proc.stderr
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout_bytes, stderr_bytes = await proc.communicate()
+    stdout = stdout_bytes.decode(errors="replace") if stdout_bytes is not None else ""
+    stderr = stderr_bytes.decode(errors="replace") if stderr_bytes is not None else ""
+    returncode = proc.returncode if proc.returncode is not None else 1
+    return returncode, stdout, stderr
 
 
 def _format_result(returncode: int, stdout: str, stderr: str) -> str:
@@ -145,18 +178,20 @@ def _prepare_image_attachment(img_path: Path) -> dict:
         with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
             tmp_path = Path(tmp.name)
         try:
-            subprocess.run(
+            result = subprocess.run(
                 [
                     "ffmpeg", "-y", "-i", str(img_path),
                     "-vf", "scale='min(960,iw)':-2",
                     str(tmp_path),
                 ],
-                capture_output=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
                 check=False,
             )
-            raw = tmp_path.read_bytes()
-            mime = "image/jpeg"
-        except Exception:
+            if result.returncode == 0 and tmp_path.exists() and tmp_path.stat().st_size > 0:
+                raw = tmp_path.read_bytes()
+                mime = "image/jpeg"
+        except OSError:
             pass
         finally:
             tmp_path.unlink(missing_ok=True)
@@ -257,7 +292,7 @@ async def transcribe(params: TranscribeParams) -> str:
             cmd += ["--language", params.language]
         if params.num_speakers:
             cmd += ["--num-speakers", str(params.num_speakers)]
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         return _format_result(rc, out, err)
 
     @define_tool(
@@ -275,7 +310,7 @@ async def transcribe_batch(params: TranscribeBatchParams) -> str:
             cmd += ["--workers", str(params.workers)]
         if params.num_speakers:
             cmd += ["--num-speakers", str(params.num_speakers)]
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         return _format_result(rc, out, err)
 
     @define_tool(
@@ -289,7 +324,7 @@ async def pack_transcripts(params: PackTranscriptsParams) -> str:
         cmd = [str(HELPERS_DIR / "pack_transcripts.py"), "--edit-dir", str(edit_dir)]
         if params.silence_threshold is not None:
             cmd += ["--silence-threshold", str(params.silence_threshold)]
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         return _format_result(rc, out, err)
 
     # Side-channel for the last timeline image path so it can be attached in the
@@ -320,7 +355,7 @@ async def timeline_view(params: TimelineViewParams) -> str:
             cmd += ["--n-frames", str(params.n_frames)]
         if params.transcript_path:
             cmd += ["--transcript", params.transcript_path]
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         result = _format_result(rc, out, err)
         if rc == 0 and out_img.exists():
             _pending_images.append(out_img)
@@ -336,10 +371,27 @@ async def timeline_view(params: TimelineViewParams) -> str:
         skip_permission=True,
     )
     async def render(params: RenderParams) -> str:
+        try:
+            edl_path = _validate_edit_dir_path(
+                params.edl_path,
+                videos_dir=videos_dir,
+                edit_dir=edit_dir,
+                label="edl_path",
+                must_exist=True,
+            )
+            output_path = _validate_edit_dir_path(
+                params.output_path,
+                videos_dir=videos_dir,
+                edit_dir=edit_dir,
+                label="output_path",
+            )
+        except ValueError as exc:
+            return f"[invalid input]\n{exc}"
+
         cmd = [
             str(HELPERS_DIR / "render.py"),
-            params.edl_path,
-            "-o", params.output_path,
+            str(edl_path),
+            "-o", str(output_path),
         ]
         if params.preview:
             cmd.append("--preview")
@@ -349,7 +401,7 @@ async def render(params: RenderParams) -> str:
             cmd.append("--no-subtitles")
         if params.no_loudnorm:
             cmd.append("--no-loudnorm")
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         return _format_result(rc, out, err)
 
     @define_tool(
@@ -361,16 +413,33 @@ async def render(params: RenderParams) -> str:
         skip_permission=True,
     )
     async def grade(params: GradeParams) -> str:
+        try:
+            input_path = _validate_edit_dir_path(
+                params.input_path,
+                videos_dir=videos_dir,
+                edit_dir=edit_dir,
+                label="input_path",
+                must_exist=True,
+            )
+            output_path = _validate_edit_dir_path(
+                params.output_path,
+                videos_dir=videos_dir,
+                edit_dir=edit_dir,
+                label="output_path",
+            )
+        except ValueError as exc:
+            return f"[invalid input]\n{exc}"
+
         cmd = [
             str(HELPERS_DIR / "grade.py"),
-            params.input_path,
-            "-o", params.output_path,
+            str(input_path),
+            "-o", str(output_path),
         ]
         if params.filter:
             cmd += ["--filter", params.filter]
         elif params.preset:
             cmd += ["--preset", params.preset]
-        rc, out, err = _run_helper(cmd)
+        rc, out, err = await _run_helper(cmd)
         return _format_result(rc, out, err)
 
     # ------------------------------------------------------------------
@@ -389,9 +458,11 @@ def on_permission_request(request, invocation) -> "PermissionRequestResult":
 
         if kind == "write":
             file_name = getattr(request, "file_name", None) or ""
-            if file_name and not _is_under(Path(file_name), edit_dir):
-                print(f"\n[write blocked — path outside edit_dir: {file_name}]", flush=True)
-                return PermissionRequestResult(kind="denied-by-rules")
+            if file_name:
+                file_path = _resolve_session_path(file_name, videos_dir)
+                if not _is_under(file_path, edit_dir):
+                    print(f"\n[write blocked — path outside edit_dir: {file_name}]", flush=True)
+                    return PermissionRequestResult(kind="denied-by-rules")
 
         return PermissionRequestResult(kind="approved")
 
@@ -569,7 +640,7 @@ def main() -> None:
         epilog=(
             "Authentication (pick one):\n"
             "  copilot auth login              Sign in via browser — no token needed\n"
-            "  GITHUB_TOKEN=... in .env        PAT with 'copilot' scope\n"
+            "  GITHUB_TOKEN=... in .env        Fine-grained token with Copilot Requests permission\n"
             "    https://github.com/settings/tokens\n"
             "\nModel options (via GitHub Copilot CLI — use /model inside session to switch):\n"
             "  (omit --model)                  Copilot auto-selects the best model\n"
@@ -578,7 +649,7 @@ def main() -> None:
             "  gpt-5                           OpenAI GPT-5\n"
             "  gpt-4.1                         OpenAI GPT-4.1\n"
             "\nEnvironment variables:\n"
-            "  GITHUB_TOKEN        PAT with 'copilot' scope (alternative to browser login)\n"
+            "  GITHUB_TOKEN        Fine-grained token with Copilot Requests permission\n"
             "  ELEVENLABS_API_KEY  ElevenLabs API key for transcription\n"
         ),
     )

From 43fb7f409267a12a7fb40ee5d57f88d73652c559 Mon Sep 17 00:00:00 2001
From: Delgerskhn <delger.bayanmunkh6@gmail.com>
Date: Fri, 24 Apr 2026 16:22:33 +0800
Subject: [PATCH 8/8] Update pyproject.toml

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 211866a..29e6f87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
 
 [project.optional-dependencies]
 animations = ["manim"]
-copilot = ["github-copilot-sdk", "pydantic>=2.0"]
+copilot = ["github-copilot-sdk>=0.1.0; python_version >= '3.11'", "pydantic>=2.0"]
 
 [build-system]
 requires = ["setuptools>=61.0"]