From d3699fe9395c201dc9dcf51eefc623346cab72a7 Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 16 Jan 2026 08:41:09 +0400 Subject: [PATCH 1/2] refactor: fix type errors --- pyproject.toml | 8 +- src/reviewbot/agent/workflow/diff_extract.py | 4 +- src/reviewbot/agent/workflow/ignore.py | 4 +- src/reviewbot/cli/commands.py | 12 +- src/reviewbot/core/reviews/review.py | 2 +- src/reviewbot/infra/embeddings/openai.py | 53 +++--- src/reviewbot/infra/gitlab/diff.py | 164 +++++++++++-------- uv.lock | 2 +- 8 files changed, 134 insertions(+), 115 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aa74f9a..2597934 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "reviewbot" -version = "0.3.0" +version = "0.3.1" description = "Add your description here" readme = "README.md" requires-python = ">=3.13" @@ -54,10 +54,6 @@ quote-style = "double" indent-style = "space" [dependency-groups] -dev = [ - "pyright>=1.1.408", - "ruff>=0.8.6", - "ty>=0.0.4", -] +dev = ["pyright>=1.1.408", "ruff>=0.8.6", "ty>=0.0.4"] [project.optional-dependencies] examples = ["fastapi"] diff --git a/src/reviewbot/agent/workflow/diff_extract.py b/src/reviewbot/agent/workflow/diff_extract.py index 18c9b40..b69df69 100644 --- a/src/reviewbot/agent/workflow/diff_extract.py +++ b/src/reviewbot/agent/workflow/diff_extract.py @@ -2,6 +2,8 @@ import re from typing import Any +type LineNo = int | None + def generate_line_code(file_path: str, old_line: int | None, new_line: int | None) -> str: """ @@ -31,7 +33,7 @@ def create_position_for_issue( in_hunk = False # Track the actual lines found in the diff to build the range - matched_lines = [] # List of (old_line, new_line) + matched_lines: list[tuple[LineNo, LineNo]] = [] # List of (old_line, new_line) for line in lines: match = hunk_header_pattern.match(line) diff --git a/src/reviewbot/agent/workflow/ignore.py b/src/reviewbot/agent/workflow/ignore.py index 833d2f4..94f1e26 100644 --- a/src/reviewbot/agent/workflow/ignore.py +++ b/src/reviewbot/agent/workflow/ignore.py @@ -69,7 +69,7 @@ def parse_reviewignore(repo_path: Path) -> list[str]: List of glob patterns to ignore """ reviewignore_path = repo_path / ".reviewignore" - patterns = [] + patterns: list[str] = [] if not reviewignore_path.exists(): console.print("[dim].reviewignore file not found, using global blacklist only[/dim]") @@ -136,7 +136,7 @@ def filter_diffs(diffs: list[FileDiff], reviewignore_patterns: list[str]) -> lis Returns: Filtered list of diffs """ - filtered = [] + filtered: list[FileDiff] = [] ignored_count = 0 for diff in diffs: diff --git a/src/reviewbot/cli/commands.py b/src/reviewbot/cli/commands.py index 85dd42b..8689e16 100644 --- a/src/reviewbot/cli/commands.py +++ b/src/reviewbot/cli/commands.py @@ -2,7 +2,7 @@ import typer -from reviewbot.agent.workflow import work_agent +from reviewbot.agent.workflow import work_agent # type: ignore from reviewbot.cli.app import app from reviewbot.infra.config.env import load_env from reviewbot.infra.git.repo_tree import tree @@ -22,10 +22,12 @@ def work( mr_iid: str = typer.Argument(..., help="Merge request IID"), ): config = load_env() - work_agent( - config, - project_id, - mr_iid, + work_agent.invoke( # type: ignore + { + "config": config, + "project_id": project_id, + "mr_iid": mr_iid, + }, ) diff --git a/src/reviewbot/core/reviews/review.py b/src/reviewbot/core/reviews/review.py index 03a097f..c675d92 100644 --- a/src/reviewbot/core/reviews/review.py +++ b/src/reviewbot/core/reviews/review.py @@ -9,7 +9,7 @@ class Review: id: UUID = field(default_factory=uuid4) repo: str = "" commit: str = "" - issues: list[Issue] = field(default_factory=list) + issues: list[Issue] = field(default_factory=list[Issue]) summary: str = "" diff --git a/src/reviewbot/infra/embeddings/openai.py b/src/reviewbot/infra/embeddings/openai.py index 6a6a9b1..e80dc89 100644 --- a/src/reviewbot/infra/embeddings/openai.py +++ b/src/reviewbot/infra/embeddings/openai.py @@ -10,6 +10,7 @@ from langchain_core.documents import Document from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter +from pydantic import BaseModel, Field, SecretStr from rich.console import Console os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -53,6 +54,14 @@ } +class SearchResult(BaseModel): + similarity: float = Field(..., description="Cosine similarity score (0 to 1)") + path: str = Field(..., description="Relative path to the source file") + filename: str = Field(..., description="Name of the file") + chunk_index: int = Field(..., description="Index of the chunk within the file") + text: str = Field(..., description="The actual code snippet") + + class CodebaseVectorStore: def __init__( self, @@ -62,7 +71,7 @@ def __init__( vectors_dir: str | Path = "vectors/codebase", embeddings_model: str | None = None, embeddings_base_url: str | None = None, - embeddings_api_key: str | None = None, + embeddings_api_key: SecretStr | None = None, ): self.repo_root = Path(repo_root).resolve() self.vectors_dir = Path(vectors_dir) @@ -73,7 +82,7 @@ def __init__( if embeddings_base_url is None: embeddings_base_url = os.getenv("EMBEDDINGS_BASE_URL") if embeddings_api_key is None: - embeddings_api_key = os.getenv("EMBEDDINGS_API_KEY", "dummy") + embeddings_api_key = SecretStr(os.getenv("EMBEDDINGS_API_KEY", "dummy")) self.embeddings = OpenAIEmbeddings( model=embeddings_model, @@ -82,7 +91,6 @@ def __init__( tiktoken_enabled=False, chunk_size=64, max_retries=3, - request_timeout=120, ) self.splitter = RecursiveCharacterTextSplitter( @@ -149,22 +157,11 @@ def build(self) -> None: raise RuntimeError("No source files found to embed") self.vector_store = FAISS.from_documents(docs, self.embeddings) - self._build_metadata_index() self.save() def compile(self, command: str) -> str: return subprocess.run(command, shell=True, capture_output=True, text=True).stdout - def _build_metadata_index(self) -> None: - self.metadata_index = {} - if not self.vector_store: - return - - for doc_id, doc in self.vector_store.docstore._dict.items(): - path = doc.metadata.get("path") - if path: - self.metadata_index.setdefault(path, []).append(doc_id) - def save(self) -> None: if not self.vector_store: return @@ -195,26 +192,26 @@ def search( *, top_k: int = 10, path: str | None = None, - ) -> list[dict]: + ) -> list[SearchResult]: if not self.vector_store: raise RuntimeError("Vector store not loaded") - filter = {} - if path: - filter["path"] = path - results = self.vector_store.similarity_search_with_score(query, k=top_k, filter=filter) - out = [] + search_filter = {"path": path} if path else None + results = self.vector_store.similarity_search_with_score( # type: ignore + query, k=top_k, filter=search_filter + ) + + out: list[SearchResult] = [] for doc, score in results: out.append( - { - "similarity": float(1 - score), - "path": doc.metadata.get("path"), - "filename": doc.metadata.get("filename"), - "chunk_index": doc.metadata.get("chunk_index"), - "text": doc.page_content, - } + SearchResult( + similarity=float(1 - score), + path=doc.metadata.get("path", ""), # type: ignore + filename=doc.metadata.get("filename", ""), # type: ignore + chunk_index=doc.metadata.get("chunk_index", 0), # type: ignore + text=doc.page_content, + ) ) - console.print(out) return out def read_file( diff --git a/src/reviewbot/infra/gitlab/diff.py b/src/reviewbot/infra/gitlab/diff.py index d53a86d..a35e43a 100644 --- a/src/reviewbot/infra/gitlab/diff.py +++ b/src/reviewbot/infra/gitlab/diff.py @@ -4,7 +4,7 @@ import httpx import requests -from pydantic import BaseModel +from pydantic import BaseModel, Field from rich.console import Console console = Console() @@ -26,11 +26,14 @@ class LineRange(BaseModel): model_config = {"extra": "forbid", "frozen": True} -class DiffPosition(BaseModel): +class DiffRefs(BaseModel): base_sha: str start_sha: str head_sha: str + project_web_url: str | None = None + +class DiffPosition(DiffRefs): position_type: Literal["text", "image", "file"] old_path: str | None = None @@ -68,6 +71,24 @@ class FileDiff(BaseModel): } +class FileChange(BaseModel): + """Represents an individual file change within a diff.""" + + old_path: str | None = Field(None, description="The path of the file before the change") + new_path: str | None = Field(None, description="The path of the file after the change") + diff: str = Field(..., description="The unified diff text") + new_file: bool = Field(False, description="Whether this is a brand new file") + deleted_file: bool = Field(False, description="Whether this file was deleted") + renamed_file: bool = Field(False, description="Whether the file was moved or renamed") + too_large: bool = Field(False, description="Whether the diff is too large") + + +class ChangesResponse(BaseModel): + """The top-level container for the changes API response.""" + + changes: list[FileChange] + + _DIFF_HEADER_RE = re.compile(r"^diff --git a/(.+?) b/(.+?)\s*$") @@ -176,11 +197,11 @@ def fetch_mr_diffs( mr_data = mr_response.json() # Get diff_refs for position objects - diff_refs = mr_data.get("diff_refs") or {} - base_sha = diff_refs.get("base_sha") - head_sha = diff_refs.get("head_sha") - start_sha = diff_refs.get("start_sha") - mr_web_url = mr_data.get("web_url") + diff_refs = mr_data["diff_refs"] + base_sha = diff_refs["base_sha"] + head_sha = diff_refs["head_sha"] + start_sha = diff_refs["start_sha"] + mr_web_url = mr_data["web_url"] if mr_web_url and "/-/merge_requests/" in mr_web_url: diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0] @@ -189,20 +210,22 @@ def fetch_mr_diffs( changes_response.raise_for_status() try: - # Try to parse as JSON (new format) - changes_data = changes_response.json() + # Try to parse as JSON + raw_json = changes_response.json() + + changes_data = ChangesResponse.model_validate(raw_json) if isinstance(changes_data, dict) and "changes" in changes_data: # New JSON format with changes array file_diffs: list[FileDiff] = [] - for change in changes_data["changes"]: - change_old_path: str | None = change.get("old_path") - change_new_path: str | None = change.get("new_path") - diff_text: str = change.get("diff", "") - change_is_new_file: bool = change.get("new_file", False) - change_is_deleted_file: bool = change.get("deleted_file", False) - change_is_renamed: bool = change.get("renamed_file", False) + for change in changes_data.changes: + change_old_path: str | None = change.old_path + change_new_path: str | None = change.new_path + diff_text: str = change.diff + change_is_new_file: bool = change.new_file + change_is_deleted_file: bool = change.deleted_file + change_is_renamed: bool = change.renamed_file # Create position object for discussions change_position: dict[str, Any] | None = None @@ -273,7 +296,7 @@ def fetch_mr_diffs( # if line # If diff is empty or too large, try to get it from raw_diffs endpoint - if not diff_text or change.get("too_large", False): + if not diff_text or change.too_large: # Fallback to raw diff endpoint for this file raw_diff_url = f"{mr_url}/diffs" raw_response = requests.get(raw_diff_url, headers=headers, timeout=timeout) @@ -339,7 +362,7 @@ def fetch_mr_diffs( # Create position object for discussions # GitLab requires line_code or line numbers (new_line/old_line) # Extract the first line number from the diff for file-level positioning - raw_position: dict[str, Any] | None = None + raw_position: DiffPosition | None = None if base_sha and head_sha and start_sha: # Try to extract the first line number from the diff extracted_new_line: int | None = None @@ -355,24 +378,24 @@ def fetch_mr_diffs( break # Create position object with line information - raw_position = { - "base_sha": base_sha, - "head_sha": head_sha, - "start_sha": start_sha, - "old_path": parsed_old_path, - "new_path": parsed_new_path, - "position_type": "text", - } + raw_position = DiffPosition( + base_sha=base_sha, + head_sha=head_sha, + start_sha=start_sha, + old_path=parsed_old_path, + new_path=parsed_new_path, + position_type="text", + ) # Add line numbers if we found them if extracted_new_line is not None: - raw_position["new_line"] = extracted_new_line + raw_position.new_line = extracted_new_line if extracted_old_line is not None: - raw_position["old_line"] = extracted_old_line + raw_position.old_line = extracted_old_line # If no lines found, use line 1 as default for file-level discussion if extracted_new_line is None and extracted_old_line is None: - raw_position["new_line"] = 1 + raw_position.new_line = 1 out.append( FileDiff( @@ -409,7 +432,7 @@ async def async_fetch_mr_diffs( mr_iid: str, token: str, timeout: int = 30, -) -> tuple[list[FileDiff], dict[str, str]]: +) -> tuple[list[FileDiff], DiffRefs]: """ Fetch merge request diffs from GitLab API (async version). @@ -429,37 +452,36 @@ async def async_fetch_mr_diffs( mr_data = mr_response.json() # Get diff_refs for position objects - diff_refs = mr_data.get("diff_refs") or {} - base_sha = diff_refs.get("base_sha") - head_sha = diff_refs.get("head_sha") - start_sha = diff_refs.get("start_sha") + diff_refs: DiffRefs = mr_data["diff_refs"] mr_web_url = mr_data.get("web_url") if mr_web_url and "/-/merge_requests/" in mr_web_url: - diff_refs["project_web_url"] = mr_web_url.split("/-/merge_requests/")[0] + diff_refs.project_web_url = mr_web_url.split("/-/merge_requests/")[0] # Try the new JSON changes endpoint first changes_response = await client.get(changes_url, headers=headers, timeout=timeout) changes_response.raise_for_status() try: - # Try to parse as JSON (new format) - changes_data = changes_response.json() + # Try to parse as JSON + raw_json = changes_response.json() + + changes_data = ChangesResponse.model_validate(raw_json) if isinstance(changes_data, dict) and "changes" in changes_data: # New JSON format with changes array file_diffs: list[FileDiff] = [] - for change in changes_data["changes"]: - change_old_path: str | None = change.get("old_path") - change_new_path: str | None = change.get("new_path") - diff_text: str = change.get("diff", "") - change_is_new_file: bool = change.get("new_file", False) - change_is_deleted_file: bool = change.get("deleted_file", False) - change_is_renamed: bool = change.get("renamed_file", False) + for change in changes_data.changes: + change_old_path: str | None = change.old_path + change_new_path: str | None = change.new_path + diff_text: str = change.diff + change_is_new_file: bool = change.new_file + change_is_deleted_file: bool = change.deleted_file + change_is_renamed: bool = change.renamed_file # Create position object for discussions - change_position: dict[str, Any] | None = None - if base_sha and head_sha and start_sha: + change_position: DiffPosition | None = None + if diff_refs.base_sha and diff_refs.head_sha and diff_refs.start_sha: # Parse diff to find first hunk with line range information hunk_header_pattern = re.compile( r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@" @@ -503,27 +525,27 @@ async def async_fetch_mr_diffs( current_new += 1 # Create position object - change_position = { - "base_sha": base_sha, - "head_sha": head_sha, - "start_sha": start_sha, - "old_path": change_old_path, - "new_path": change_new_path, - "position_type": "text", - } + change_position = DiffPosition( + base_sha=diff_refs.base_sha, + head_sha=diff_refs.head_sha, + start_sha=diff_refs.start_sha, + old_path=change_old_path, + new_path=change_new_path, + position_type="text", + ) if change_new_line is not None: - change_position["new_line"] = change_new_line + change_position.new_line = change_new_line if change_old_line is not None: - change_position["old_line"] = change_old_line + change_position.old_line = change_old_line # Default fallback if change_new_line is None and change_old_line is None: - change_position["new_line"] = 1 + change_position.new_line = 1 # If diff is empty or too large, try to get it from raw_diffs endpoint - if not diff_text or change.get("too_large", False): + if not diff_text or change.too_large: # Fallback to raw diff endpoint for this file raw_diff_url = f"{mr_url}/diffs" raw_response = await client.get( @@ -589,8 +611,8 @@ async def async_fetch_mr_diffs( ) = _parse_paths_from_chunk(lines) # Create position object for discussions - raw_position: dict[str, Any] | None = None - if base_sha and head_sha and start_sha: + raw_position: DiffPosition | None = None + if diff_refs.base_sha and diff_refs.head_sha and diff_refs.start_sha: extracted_new_line: int | None = None extracted_old_line: int | None = None @@ -604,24 +626,24 @@ async def async_fetch_mr_diffs( break # Create position object with line information - raw_position = { - "base_sha": base_sha, - "head_sha": head_sha, - "start_sha": start_sha, - "old_path": parsed_old_path, - "new_path": parsed_new_path, - "position_type": "text", - } + raw_position = DiffPosition( + base_sha=diff_refs.base_sha, + head_sha=diff_refs.head_sha, + start_sha=diff_refs.start_sha, + old_path=parsed_old_path, + new_path=parsed_new_path, + position_type="text", + ) # Add line numbers if we found them if extracted_new_line is not None: - raw_position["new_line"] = extracted_new_line + raw_position.new_line = extracted_new_line if extracted_old_line is not None: - raw_position["old_line"] = extracted_old_line + raw_position.old_line = extracted_old_line # If no lines found, use line 1 as default for file-level discussion if extracted_new_line is None and extracted_old_line is None: - raw_position["new_line"] = 1 + raw_position.new_line = 1 out.append( FileDiff( diff --git a/uv.lock b/uv.lock index f656c0b..339a538 100644 --- a/uv.lock +++ b/uv.lock @@ -1460,7 +1460,7 @@ wheels = [ [[package]] name = "reviewbot" -version = "0.3.0" +version = "0.3.1" source = { editable = "." } dependencies = [ { name = "dotenv" }, From 37d990098cc1150f50220268af8c750a228cb52a Mon Sep 17 00:00:00 2001 From: canefe <8518141+canefe@users.noreply.github.com> Date: Fri, 16 Jan 2026 08:44:32 +0400 Subject: [PATCH 2/2] ci: add pyright type check --- .github/workflows/ci.yml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b50ece..69dea9a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,9 +13,22 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - run: pip install ruff - - run: ruff format --check . - - run: ruff check . + - uses: astral-sh/setup-uv@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install dependencies + run: uv sync + + - name: Format Check (Ruff) + run: uv run ruff format --check . + + - name: Lint Check (Ruff) + run: uv run ruff check . + + - name: Type Check (Pyright) + run: uv run pyright . build: runs-on: ubuntu-latest