diff --git a/.github/scripts/__pycache__/multi_agent_review.cpython-313.pyc b/.github/scripts/__pycache__/multi_agent_review.cpython-313.pyc index 85aa8fc..f74c117 100644 Binary files a/.github/scripts/__pycache__/multi_agent_review.cpython-313.pyc and b/.github/scripts/__pycache__/multi_agent_review.cpython-313.pyc differ diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py index 62a969b..4546269 100644 --- a/.github/scripts/multi_agent_review.py +++ b/.github/scripts/multi_agent_review.py @@ -112,27 +112,34 @@ def sanitize_jsonish(s: str) -> str: return s -def extract_first_json_object(s: str) -> Optional[str]: +def strip_json_fence(s: str) -> str: if not s: - return None + return s stripped = s.strip() + if not stripped.startswith("```"): + return stripped - # Strip a leading markdown fence if present - if stripped.startswith("```"): - lines = stripped.splitlines() - if lines: - # Check for language identifier (json, jsonl, etc.) - first_line = lines[0].lower() - if "json" in first_line or first_line.strip() == "```": - lines = lines[1:] # drop first fence line - else: - # Not a JSON code block, might be text - pass - - if lines and lines[-1].strip().startswith("```"): - lines = lines[:-1] - stripped = "\n".join(lines).strip() + lines = stripped.splitlines() + if not lines: + return stripped + + # Only strip fences when the block is generic or json-like. + first_line = lines[0].strip().lower() + if first_line != "```" and "json" not in first_line: + return stripped + + body = lines[1:] + if body and body[-1].strip().startswith("```"): + body = body[:-1] + return "\n".join(body).strip() + + +def extract_first_json_object(s: str) -> Optional[str]: + if not s: + return None + + stripped = strip_json_fence(s) # Try to find JSON object start = stripped.find("{") @@ -140,14 +147,33 @@ def extract_first_json_object(s: str) -> Optional[str]: return None depth = 0 + in_string = False + escaped = False for i in range(start, len(stripped)): ch = stripped[i] + if in_string: + if escaped: + escaped = False + continue + if ch == "\\": + escaped = True + continue + if ch == '"': + in_string = False + continue + + if ch == '"': + in_string = True + continue if ch == "{": depth += 1 - elif ch == "}": + continue + if ch == "}": depth -= 1 if depth == 0: return stripped[start:i + 1] + if depth < 0: + return None return None @@ -158,18 +184,22 @@ def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: try: s = sanitize_jsonish(s) - js = extract_first_json_object(s) + clean = strip_json_fence(s) + + # Fast path: whole payload is a JSON object. + try: + parsed = json.loads(clean) + if not isinstance(parsed, dict): + return None, f"JSON is not an object (got {type(parsed).__name__})" + return parsed, None + except json.JSONDecodeError: + pass + except Exception: + pass + + js = extract_first_json_object(clean) if not js: - # Try parsing the whole string as JSON (maybe it's already clean) - try: - parsed = json.loads(s.strip()) - if not isinstance(parsed, dict): - return None, f"JSON is not an object (got {type(parsed).__name__})" - return parsed, None - except json.JSONDecodeError: - return None, "No JSON object found in response" - except Exception: - return None, "No JSON object found in response" + return None, "No JSON object found in response" js = sanitize_jsonish(js) parsed = json.loads(js) if not isinstance(parsed, dict): @@ -252,15 +282,24 @@ def must_mention_file(line: str, allowed_files: List[str]) -> bool: def mentions_only_allowed_files(line: str, allowed_files: List[str]) -> bool: """ - Conservative: if line mentions a path with a known extension, + Conservative: if line mentions a file-like path, ensure every mentioned path is within allowed_files. """ import re - paths = re.findall(r"[\w./-]+\.(?:py|yml|yaml|kt|java|md)", line or "") + + paths = re.findall( + r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.[A-Za-z][A-Za-z0-9]{0,7}", + line or "", + ) if not paths: return True - allowed_set = set(allowed_files) - return all(p in allowed_set for p in paths) + + allowed_set = {f.lower() for f in allowed_files} + for path in paths: + normalized = path.strip("`'\"()[]{}<>,:;!?").rstrip(".").lower() + if normalized and normalized not in allowed_set: + return False + return True def diff_facts(diff: str, allowed_files: List[str], max_lines: int = 60) -> str: @@ -324,6 +363,8 @@ def filter_bullets_by_fact_gate(items: List[str], diff_lower: str, allowed_files continue if allowed_files and not mentions_only_allowed_files(s, allowed_files): continue + if STRICT_FACT_GATING and not gate_claims_to_diff(s, diff_lower): + continue out.append(s) break diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml index 92103af..c022e68 100644 --- a/.github/workflows/review.yml +++ b/.github/workflows/review.yml @@ -29,7 +29,9 @@ jobs: run: | $pr = "${{ github.event.pull_request.number }}" $comments = gh api repos/${{ github.repository }}/issues/$pr/comments | ConvertFrom-Json - $ai = $comments | Where-Object { $_.body -match "Reviewed-Head-SHA:" } | Select-Object -Last 1 + $ai = $comments | Where-Object { + $_.body -match "AI_REVIEW:HEAD_SHA=" -or $_.body -match "Reviewed-Head-SHA:" + } | Select-Object -Last 1 if ($null -ne $ai) { $m = [regex]::Match($ai.body, "AI_REVIEW:HEAD_SHA=([0-9a-fA-F]{7,40})") if ($m.Success) { @@ -45,19 +47,39 @@ jobs: run: | git fetch origin ${{ github.base_ref }} --depth=1 + $baseRange = "origin/${{ github.base_ref }}...HEAD" + $diffBuilt = $false + if ($env:PREV_SHA) { # Validate that PREV_SHA exists in this checkout (force-push can break it) - git cat-file -e "$env:PREV_SHA^{commit}" 2>$null + git cat-file -e "$($env:PREV_SHA)^{commit}" 2>$null if ($LASTEXITCODE -eq 0) { - git diff --unified=3 $env:PREV_SHA...HEAD > diff.txt - Write-Host "Using incremental diff: $env:PREV_SHA...HEAD" + # Ensure PREV_SHA is still in the current branch history. + git merge-base --is-ancestor "$env:PREV_SHA" HEAD 2>$null + if ($LASTEXITCODE -eq 0) { + $incrementalRange = "$($env:PREV_SHA)...HEAD" + git diff --unified=3 "$incrementalRange" > diff.txt + if ($LASTEXITCODE -eq 0) { + Write-Host "Using incremental diff: $incrementalRange" + $diffBuilt = $true + } else { + Write-Host "Incremental diff command failed. Falling back to full diff." + } + } else { + Write-Host "PREV_SHA is not an ancestor of HEAD. Falling back to full diff." + } } else { Write-Host "PREV_SHA not found locally (maybe force-push). Falling back to full diff." - git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt } - } else { - git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt - Write-Host "Using full PR diff against base." + } + + if (-not $diffBuilt) { + git diff --unified=3 "$baseRange" > diff.txt + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to build diff for range: $baseRange" + exit 1 + } + Write-Host "Using full PR diff against base: $baseRange" } Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List