From d3699fe9395c201dc9dcf51eefc623346cab72a7 Mon Sep 17 00:00:00 2001
From: canefe <8518141+canefe@users.noreply.github.com>
Date: Fri, 16 Jan 2026 08:41:09 +0400
Subject: [PATCH 1/2] refactor: fix type errors

---
 pyproject.toml                               |   8 +-
 src/reviewbot/agent/workflow/diff_extract.py |   4 +-
 src/reviewbot/agent/workflow/ignore.py       |   4 +-
 src/reviewbot/cli/commands.py                |  12 +-
 src/reviewbot/core/reviews/review.py         |   2 +-
 src/reviewbot/infra/embeddings/openai.py     |  53 +++---
 src/reviewbot/infra/gitlab/diff.py           | 164 +++++++++++--------
 uv.lock                                      |   2 +-
 8 files changed, 134 insertions(+), 115 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index aa74f9a..2597934 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "reviewbot"
-version = "0.3.0"
+version = "0.3.1"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
@@ -54,10 +54,6 @@ quote-style = "double"
 indent-style = "space"
 
 [dependency-groups]
-dev = [
-    "pyright>=1.1.408",
-    "ruff>=0.8.6",
-    "ty>=0.0.4",
-]
+dev = ["pyright>=1.1.408", "ruff>=0.8.6", "ty>=0.0.4"]
 [project.optional-dependencies]
 examples = ["fastapi"]
diff --git a/src/reviewbot/agent/workflow/diff_extract.py b/src/reviewbot/agent/workflow/diff_extract.py
index 18c9b40..b69df69 100644
--- a/src/reviewbot/agent/workflow/diff_extract.py
+++ b/src/reviewbot/agent/workflow/diff_extract.py
@@ -2,6 +2,8 @@
 import re
 from typing import Any
 
+type LineNo = int | None
+
 
 def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str:
     """
@@ -31,7 +33,7 @@ def create_position_for_issue(
     in_hunk = False
 
     # Track the actual lines found in the diff to build the range
-    matched_lines = []  # List of (old_line, new_line)
+    matched_lines: list[tuple[LineNo, LineNo]] = []  # List of (old_line, new_line)
 
     for line in lines:
         match = hunk_header_pattern.match(line)
diff --git a/src/reviewbot/agent/workflow/ignore.py b/src/reviewbot/agent/workflow/ignore.py
index 833d2f4..94f1e26 100644
--- a/src/reviewbot/agent/workflow/ignore.py
+++ b/src/reviewbot/agent/workflow/ignore.py
@@ -69,7 +69,7 @@ def parse_reviewignore(repo_path: Path) -> list[str]:
         List of glob patterns to ignore
     """
     reviewignore_path = repo_path / ".reviewignore"
-    patterns = []
+    patterns: list[str] = []
 
     if not reviewignore_path.exists():
         console.print("[dim].reviewignore file not found, using global blacklist only[/dim]")
@@ -136,7 +136,7 @@ def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> lis
     Returns:
         Filtered list of diffs
     """
-    filtered = []
+    filtered: list[FileDiff] = []
     ignored_count = 0
 
     for diff in diffs:
diff --git a/src/reviewbot/cli/commands.py b/src/reviewbot/cli/commands.py
index 85dd42b..8689e16 100644
--- a/src/reviewbot/cli/commands.py
+++ b/src/reviewbot/cli/commands.py
@@ -2,7 +2,7 @@
 
 import typer
 
-from reviewbot.agent.workflow import work_agent
+from reviewbot.agent.workflow import work_agent  # type: ignore
 from reviewbot.cli.app import app
 from reviewbot.infra.config.env import load_env
 from reviewbot.infra.git.repo_tree import tree
@@ -22,10 +22,12 @@ def work(
     mr_iid: str = typer.Argument(..., help="Merge request IID"),
 ):
     config = load_env()
-    work_agent(
-        config,
-        project_id,
-        mr_iid,
+    work_agent.invoke(  # type: ignore
+        {
+            "config": config,
+            "project_id": project_id,
+            "mr_iid": mr_iid,
+        },
     )
 
 
diff --git a/src/reviewbot/core/reviews/review.py b/src/reviewbot/core/reviews/review.py
index 03a097f..c675d92 100644
--- a/src/reviewbot/core/reviews/review.py
+++ b/src/reviewbot/core/reviews/review.py
@@ -9,7 +9,7 @@ class Review:
     id: UUID = field(default_factory=uuid4)
     repo: str = ""
     commit: str = ""
-    issues: list[Issue] = field(default_factory=list)
+    issues: list[Issue] = field(default_factory=list[Issue])
     summary: str = ""
 
 
diff --git a/src/reviewbot/infra/embeddings/openai.py b/src/reviewbot/infra/embeddings/openai.py
index 6a6a9b1..e80dc89 100644
--- a/src/reviewbot/infra/embeddings/openai.py
+++ b/src/reviewbot/infra/embeddings/openai.py
@@ -10,6 +10,7 @@
 from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from pydantic import BaseModel, Field, SecretStr
 from rich.console import Console
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -53,6 +54,14 @@
 }
 
 
+class SearchResult(BaseModel):
+    similarity: float = Field(..., description="Cosine similarity score (0 to 1)")
+    path: str = Field(..., description="Relative path to the source file")
+    filename: str = Field(..., description="Name of the file")
+    chunk_index: int = Field(..., description="Index of the chunk within the file")
+    text: str = Field(..., description="The actual code snippet")
+
+
 class CodebaseVectorStore:
     def __init__(
         self,
@@ -62,7 +71,7 @@ def __init__(
         vectors_dir: str | Path = "vectors/codebase",
         embeddings_model: str | None = None,
         embeddings_base_url: str | None = None,
-        embeddings_api_key: str | None = None,
+        embeddings_api_key: SecretStr | None = None,
     ):
         self.repo_root = Path(repo_root).resolve()
         self.vectors_dir = Path(vectors_dir)
@@ -73,7 +82,7 @@ def __init__(
         if embeddings_base_url is None:
             embeddings_base_url = os.getenv("EMBEDDINGS_BASE_URL")
         if embeddings_api_key is None:
-            embeddings_api_key = os.getenv("EMBEDDINGS_API_KEY", "dummy")
+            embeddings_api_key = SecretStr(os.getenv("EMBEDDINGS_API_KEY", "dummy"))
 
         self.embeddings = OpenAIEmbeddings(
             model=embeddings_model,
@@ -82,7 +91,6 @@ def __init__(
             tiktoken_enabled=False,
             chunk_size=64,
             max_retries=3,
-            request_timeout=120,
         )
 
         self.splitter = RecursiveCharacterTextSplitter(
@@ -149,22 +157,11 @@ def build(self) -> None:
             raise RuntimeError("No source files found to embed")
 
         self.vector_store = FAISS.from_documents(docs, self.embeddings)
-        self._build_metadata_index()
         self.save()
 
     def compile(self, command: str) -> str:
         return subprocess.run(command, shell=True, capture_output=True, text=True).stdout
 
-    def _build_metadata_index(self) -> None:
-        self.metadata_index = {}
-        if not self.vector_store:
-            return
-
-        for doc_id, doc in self.vector_store.docstore._dict.items():
-            path = doc.metadata.get("path")
-            if path:
-                self.metadata_index.setdefault(path, []).append(doc_id)
-
     def save(self) -> None:
         if not self.vector_store:
             return
@@ -195,26 +192,26 @@ def search(
         *,
         top_k: int = 10,
         path: str | None = None,
-    ) -> list[dict]:
+    ) -> list[SearchResult]:
         if not self.vector_store:
             raise RuntimeError("Vector store not loaded")
 
-        filter = {}
-        if path:
-            filter["path"] = path
-        results = self.vector_store.similarity_search_with_score(query, k=top_k, filter=filter)
-        out = []
+        search_filter = {"path": path} if path else None
+        results = self.vector_store.similarity_search_with_score(  # type: ignore
+            query, k=top_k, filter=search_filter
+        )
+
+        out: list[SearchResult] = []
         for doc, score in results:
             out.append(
-                {
-                    "similarity": float(1 - score),
-                    "path": doc.metadata.get("path"),
-                    "filename": doc.metadata.get("filename"),
-                    "chunk_index": doc.metadata.get("chunk_index"),
-                    "text": doc.page_content,
-                }
+                SearchResult(
+                    similarity=float(1 - score),
+                    path=doc.metadata.get("path", ""),  # type: ignore
+                    filename=doc.metadata.get("filename", ""),  # type: ignore
+                    chunk_index=doc.metadata.get("chunk_index", 0),  # type: ignore
+                    text=doc.page_content,
+                )
             )
-        console.print(out)
         return out
 
     def read_file(
diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py
index d53a86d..a35e43a 100644
--- a/src/reviewbot/infra/gitlab/diff.py
+++ b/src/reviewbot/infra/gitlab/diff.py
@@ -4,7 +4,7 @@
 
 import httpx
 import requests
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from rich.console import Console
 
 console = Console()
@@ -26,11 +26,14 @@ class LineRange(BaseModel):
     model_config = {"extra": "forbid", "frozen": True}
 
 
-class DiffPosition(BaseModel):
+class DiffRefs(BaseModel):
     base_sha: str
     start_sha: str
     head_sha: str
+    project_web_url: str | None = None
 
+
+class DiffPosition(DiffRefs):
     position_type: Literal["text", "image", "file"]
 
     old_path: str | None = None
@@ -68,6 +71,24 @@ class FileDiff(BaseModel):
     }
 
 
+class FileChange(BaseModel):
+    """Represents an individual file change within a diff."""
+
+    old_path: str | None = Field(None, description="The path of the file before the change")
+    new_path: str | None = Field(None, description="The path of the file after the change")
+    diff: str = Field(..., description="The unified diff text")
+    new_file: bool = Field(False, description="Whether this is a brand new file")
+    deleted_file: bool = Field(False, description="Whether this file was deleted")
+    renamed_file: bool = Field(False, description="Whether the file was moved or renamed")
+    too_large: bool = Field(False, description="Whether the diff is too large")
+
+
+class ChangesResponse(BaseModel):
+    """The top-level container for the changes API response."""
+
+    changes: list[FileChange]
+
+
 _DIFF_HEADER_RE = re.compile(r"^diff --git a/(.+?) b/(.+?)\s*$")
 
 
@@ -176,11 +197,11 @@ def fetch_mr_diffs(
     mr_data = mr_response.json()
 
     # Get diff_refs for position objects
-    diff_refs = mr_data.get("diff_refs") or {}
-    base_sha = diff_refs.get("base_sha")
-    head_sha = diff_refs.get("head_sha")
-    start_sha = diff_refs.get("start_sha")
-    mr_web_url = mr_data.get("web_url")
+    diff_refs = mr_data["diff_refs"]
+    base_sha = diff_refs["base_sha"]
+    head_sha = diff_refs["head_sha"]
+    start_sha = diff_refs["start_sha"]
+    mr_web_url = mr_data["web_url"]
     if mr_web_url and "/-/merge_requests/" in mr_web_url:
         diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0]
 
@@ -189,20 +210,22 @@ def fetch_mr_diffs(
     changes_response.raise_for_status()
 
     try:
-        # Try to parse as JSON (new format)
-        changes_data = changes_response.json()
+        # Try to parse as JSON
+        raw_json = changes_response.json()
+
+        changes_data = ChangesResponse.model_validate(raw_json)
 
         if isinstance(changes_data, dict) and "changes" in changes_data:
             # New JSON format with changes array
             file_diffs: list[FileDiff] = []
 
-            for change in changes_data["changes"]:
-                change_old_path: str | None = change.get("old_path")
-                change_new_path: str | None = change.get("new_path")
-                diff_text: str = change.get("diff", "")
-                change_is_new_file: bool = change.get("new_file", False)
-                change_is_deleted_file: bool = change.get("deleted_file", False)
-                change_is_renamed: bool = change.get("renamed_file", False)
+            for change in changes_data.changes:
+                change_old_path: str | None = change.old_path
+                change_new_path: str | None = change.new_path
+                diff_text: str = change.diff
+                change_is_new_file: bool = change.new_file
+                change_is_deleted_file: bool = change.deleted_file
+                change_is_renamed: bool = change.renamed_file
 
                 # Create position object for discussions
                 change_position: dict[str, Any] | None = None
@@ -273,7 +296,7 @@ def fetch_mr_diffs(
                     # if line
 
                 # If diff is empty or too large, try to get it from raw_diffs endpoint
-                if not diff_text or change.get("too_large", False):
+                if not diff_text or change.too_large:
                     # Fallback to raw diff endpoint for this file
                     raw_diff_url = f"{mr_url}/diffs"
                     raw_response = requests.get(raw_diff_url, headers=headers, timeout=timeout)
@@ -339,7 +362,7 @@ def fetch_mr_diffs(
         # Create position object for discussions
         # GitLab requires line_code or line numbers (new_line/old_line)
         # Extract the first line number from the diff for file-level positioning
-        raw_position: dict[str, Any] | None = None
+        raw_position: DiffPosition | None = None
         if base_sha and head_sha and start_sha:
             # Try to extract the first line number from the diff
             extracted_new_line: int | None = None
@@ -355,24 +378,24 @@ def fetch_mr_diffs(
                     break
 
             # Create position object with line information
-            raw_position = {
-                "base_sha": base_sha,
-                "head_sha": head_sha,
-                "start_sha": start_sha,
-                "old_path": parsed_old_path,
-                "new_path": parsed_new_path,
-                "position_type": "text",
-            }
+            raw_position = DiffPosition(
+                base_sha=base_sha,
+                head_sha=head_sha,
+                start_sha=start_sha,
+                old_path=parsed_old_path,
+                new_path=parsed_new_path,
+                position_type="text",
+            )
 
             # Add line numbers if we found them
             if extracted_new_line is not None:
-                raw_position["new_line"] = extracted_new_line
+                raw_position.new_line = extracted_new_line
             if extracted_old_line is not None:
-                raw_position["old_line"] = extracted_old_line
+                raw_position.old_line = extracted_old_line
 
             # If no lines found, use line 1 as default for file-level discussion
             if extracted_new_line is None and extracted_old_line is None:
-                raw_position["new_line"] = 1
+                raw_position.new_line = 1
 
         out.append(
             FileDiff(
@@ -409,7 +432,7 @@ async def async_fetch_mr_diffs(
     mr_iid: str,
     token: str,
     timeout: int = 30,
-) -> tuple[list[FileDiff], dict[str, str]]:
+) -> tuple[list[FileDiff], DiffRefs]:
     """
     Fetch merge request diffs from GitLab API (async version).
 
@@ -429,37 +452,36 @@ async def async_fetch_mr_diffs(
         mr_data = mr_response.json()
 
         # Get diff_refs for position objects
-        diff_refs = mr_data.get("diff_refs") or {}
-        base_sha = diff_refs.get("base_sha")
-        head_sha = diff_refs.get("head_sha")
-        start_sha = diff_refs.get("start_sha")
+        diff_refs: DiffRefs = mr_data["diff_refs"]
         mr_web_url = mr_data.get("web_url")
         if mr_web_url and "/-/merge_requests/" in mr_web_url:
-            diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0]
+            diff_refs.project_web_url = mr_web_url.split("/-/merge_requests/")[0]
 
         # Try the new JSON changes endpoint first
         changes_response = await client.get(changes_url, headers=headers, timeout=timeout)
         changes_response.raise_for_status()
 
         try:
-            # Try to parse as JSON (new format)
-            changes_data = changes_response.json()
+            # Try to parse as JSON
+            raw_json = changes_response.json()
+
+            changes_data = ChangesResponse.model_validate(raw_json)
 
             if isinstance(changes_data, dict) and "changes" in changes_data:
                 # New JSON format with changes array
                 file_diffs: list[FileDiff] = []
 
-                for change in changes_data["changes"]:
-                    change_old_path: str | None = change.get("old_path")
-                    change_new_path: str | None = change.get("new_path")
-                    diff_text: str = change.get("diff", "")
-                    change_is_new_file: bool = change.get("new_file", False)
-                    change_is_deleted_file: bool = change.get("deleted_file", False)
-                    change_is_renamed: bool = change.get("renamed_file", False)
+                for change in changes_data.changes:
+                    change_old_path: str | None = change.old_path
+                    change_new_path: str | None = change.new_path
+                    diff_text: str = change.diff
+                    change_is_new_file: bool = change.new_file
+                    change_is_deleted_file: bool = change.deleted_file
+                    change_is_renamed: bool = change.renamed_file
 
                     # Create position object for discussions
-                    change_position: dict[str, Any] | None = None
-                    if base_sha and head_sha and start_sha:
+                    change_position: DiffPosition | None = None
+                    if diff_refs.base_sha and diff_refs.head_sha and diff_refs.start_sha:
                         # Parse diff to find first hunk with line range information
                         hunk_header_pattern = re.compile(
                             r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@"
@@ -503,27 +525,27 @@ async def async_fetch_mr_diffs(
                                 current_new += 1
 
                         # Create position object
-                        change_position = {
-                            "base_sha": base_sha,
-                            "head_sha": head_sha,
-                            "start_sha": start_sha,
-                            "old_path": change_old_path,
-                            "new_path": change_new_path,
-                            "position_type": "text",
-                        }
+                        change_position = DiffPosition(
+                            base_sha=diff_refs.base_sha,
+                            head_sha=diff_refs.head_sha,
+                            start_sha=diff_refs.start_sha,
+                            old_path=change_old_path,
+                            new_path=change_new_path,
+                            position_type="text",
+                        )
 
                         if change_new_line is not None:
-                            change_position["new_line"] = change_new_line
+                            change_position.new_line = change_new_line
 
                         if change_old_line is not None:
-                            change_position["old_line"] = change_old_line
+                            change_position.old_line = change_old_line
 
                         # Default fallback
                         if change_new_line is None and change_old_line is None:
-                            change_position["new_line"] = 1
+                            change_position.new_line = 1
 
                     # If diff is empty or too large, try to get it from raw_diffs endpoint
-                    if not diff_text or change.get("too_large", False):
+                    if not diff_text or change.too_large:
                         # Fallback to raw diff endpoint for this file
                         raw_diff_url = f"{mr_url}/diffs"
                         raw_response = await client.get(
@@ -589,8 +611,8 @@ async def async_fetch_mr_diffs(
         ) = _parse_paths_from_chunk(lines)
 
         # Create position object for discussions
-        raw_position: dict[str, Any] | None = None
-        if base_sha and head_sha and start_sha:
+        raw_position: DiffPosition | None = None
+        if diff_refs.base_sha and diff_refs.head_sha and diff_refs.start_sha:
             extracted_new_line: int | None = None
             extracted_old_line: int | None = None
 
@@ -604,24 +626,24 @@ async def async_fetch_mr_diffs(
                     break
 
             # Create position object with line information
-            raw_position = {
-                "base_sha": base_sha,
-                "head_sha": head_sha,
-                "start_sha": start_sha,
-                "old_path": parsed_old_path,
-                "new_path": parsed_new_path,
-                "position_type": "text",
-            }
+            raw_position = DiffPosition(
+                base_sha=diff_refs.base_sha,
+                head_sha=diff_refs.head_sha,
+                start_sha=diff_refs.start_sha,
+                old_path=parsed_old_path,
+                new_path=parsed_new_path,
+                position_type="text",
+            )
 
             # Add line numbers if we found them
             if extracted_new_line is not None:
-                raw_position["new_line"] = extracted_new_line
+                raw_position.new_line = extracted_new_line
             if extracted_old_line is not None:
-                raw_position["old_line"] = extracted_old_line
+                raw_position.old_line = extracted_old_line
 
             # If no lines found, use line 1 as default for file-level discussion
             if extracted_new_line is None and extracted_old_line is None:
-                raw_position["new_line"] = 1
+                raw_position.new_line = 1
 
         out.append(
             FileDiff(
diff --git a/uv.lock b/uv.lock
index f656c0b..339a538 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1460,7 +1460,7 @@ wheels = [
 
 [[package]]
 name = "reviewbot"
-version = "0.3.0"
+version = "0.3.1"
 source = { editable = "." }
 dependencies = [
     { name = "dotenv" },

From 37d990098cc1150f50220268af8c750a228cb52a Mon Sep 17 00:00:00 2001
From: canefe <8518141+canefe@users.noreply.github.com>
Date: Fri, 16 Jan 2026 08:44:32 +0400
Subject: [PATCH 2/2] ci: add pyright type check

---
 .github/workflows/ci.yml | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9b50ece..69dea9a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,9 +13,22 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - run: pip install ruff
-      - run: ruff format --check .
-      - run: ruff check .
+      - uses: astral-sh/setup-uv@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Format Check (Ruff)
+        run: uv run ruff format --check .
+
+      - name: Lint Check (Ruff)
+        run: uv run ruff check .
+
+      - name: Type Check (Pyright)
+        run: uv run pyright .
 
   build:
     runs-on: ubuntu-latest