Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .github/scripts/__pycache__/multi_agent_review.cpython-313.pyc
Binary file not shown.
107 changes: 74 additions & 33 deletions .github/scripts/multi_agent_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,42 +112,68 @@ def sanitize_jsonish(s: str) -> str:
return s


def extract_first_json_object(s: str) -> Optional[str]:
def strip_json_fence(s: str) -> str:
if not s:
return None
return s

stripped = s.strip()
if not stripped.startswith("```"):
return stripped

# Strip a leading markdown fence if present
if stripped.startswith("```"):
lines = stripped.splitlines()
if lines:
# Check for language identifier (json, jsonl, etc.)
first_line = lines[0].lower()
if "json" in first_line or first_line.strip() == "```":
lines = lines[1:] # drop first fence line
else:
# Not a JSON code block, might be text
pass

if lines and lines[-1].strip().startswith("```"):
lines = lines[:-1]
stripped = "\n".join(lines).strip()
lines = stripped.splitlines()
if not lines:
return stripped

# Only strip fences when the block is generic or json-like.
first_line = lines[0].strip().lower()
if first_line != "```" and "json" not in first_line:
return stripped

body = lines[1:]
if body and body[-1].strip().startswith("```"):
body = body[:-1]
return "\n".join(body).strip()


def extract_first_json_object(s: str) -> Optional[str]:
if not s:
return None

stripped = strip_json_fence(s)

# Try to find JSON object
start = stripped.find("{")
if start == -1:
return None

depth = 0
in_string = False
escaped = False
for i in range(start, len(stripped)):
ch = stripped[i]
if in_string:
if escaped:
escaped = False
continue
if ch == "\\":
escaped = True
continue
if ch == '"':
in_string = False
continue

if ch == '"':
in_string = True
continue
if ch == "{":
depth += 1
elif ch == "}":
continue
if ch == "}":
depth -= 1
if depth == 0:
return stripped[start:i + 1]
if depth < 0:
return None

return None

Expand All @@ -158,18 +184,22 @@ def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:

try:
s = sanitize_jsonish(s)
js = extract_first_json_object(s)
clean = strip_json_fence(s)

# Fast path: whole payload is a JSON object.
try:
parsed = json.loads(clean)
if not isinstance(parsed, dict):
return None, f"JSON is not an object (got {type(parsed).__name__})"
return parsed, None
except json.JSONDecodeError:
pass
except Exception:
pass

js = extract_first_json_object(clean)
if not js:
# Try parsing the whole string as JSON (maybe it's already clean)
try:
parsed = json.loads(s.strip())
if not isinstance(parsed, dict):
return None, f"JSON is not an object (got {type(parsed).__name__})"
return parsed, None
except json.JSONDecodeError:
return None, "No JSON object found in response"
except Exception:
return None, "No JSON object found in response"
return None, "No JSON object found in response"
js = sanitize_jsonish(js)
parsed = json.loads(js)
if not isinstance(parsed, dict):
Expand Down Expand Up @@ -252,15 +282,24 @@ def must_mention_file(line: str, allowed_files: List[str]) -> bool:

def mentions_only_allowed_files(line: str, allowed_files: List[str]) -> bool:
"""
Conservative: if line mentions a path with a known extension,
Conservative: if line mentions a file-like path,
ensure every mentioned path is within allowed_files.
"""
import re
paths = re.findall(r"[\w./-]+\.(?:py|yml|yaml|kt|java|md)", line or "")

paths = re.findall(
r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.[A-Za-z][A-Za-z0-9]{0,7}",
line or "",
)
if not paths:
return True
allowed_set = set(allowed_files)
return all(p in allowed_set for p in paths)

allowed_set = {f.lower() for f in allowed_files}
for path in paths:
normalized = path.strip("`'\"()[]{}<>,:;!?").rstrip(".").lower()
if normalized and normalized not in allowed_set:
return False
return True


def diff_facts(diff: str, allowed_files: List[str], max_lines: int = 60) -> str:
Expand Down Expand Up @@ -324,6 +363,8 @@ def filter_bullets_by_fact_gate(items: List[str], diff_lower: str, allowed_files
continue
if allowed_files and not mentions_only_allowed_files(s, allowed_files):
continue
if STRICT_FACT_GATING and not gate_claims_to_diff(s, diff_lower):
continue
out.append(s)
break

Expand Down
38 changes: 30 additions & 8 deletions .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ jobs:
run: |
$pr = "${{ github.event.pull_request.number }}"
$comments = gh api repos/${{ github.repository }}/issues/$pr/comments | ConvertFrom-Json
$ai = $comments | Where-Object { $_.body -match "Reviewed-Head-SHA:" } | Select-Object -Last 1
$ai = $comments | Where-Object {
$_.body -match "AI_REVIEW:HEAD_SHA=" -or $_.body -match "Reviewed-Head-SHA:"
} | Select-Object -Last 1
if ($null -ne $ai) {
$m = [regex]::Match($ai.body, "AI_REVIEW:HEAD_SHA=([0-9a-fA-F]{7,40})")
if ($m.Success) {
Expand All @@ -45,19 +47,39 @@ jobs:
run: |
git fetch origin ${{ github.base_ref }} --depth=1

$baseRange = "origin/${{ github.base_ref }}...HEAD"
$diffBuilt = $false

if ($env:PREV_SHA) {
# Validate that PREV_SHA exists in this checkout (force-push can break it)
git cat-file -e "$env:PREV_SHA^{commit}" 2>$null
git cat-file -e "$($env:PREV_SHA)^{commit}" 2>$null
if ($LASTEXITCODE -eq 0) {
git diff --unified=3 $env:PREV_SHA...HEAD > diff.txt
Write-Host "Using incremental diff: $env:PREV_SHA...HEAD"
# Ensure PREV_SHA is still in the current branch history.
git merge-base --is-ancestor "$env:PREV_SHA" HEAD 2>$null
if ($LASTEXITCODE -eq 0) {
$incrementalRange = "$($env:PREV_SHA)...HEAD"
git diff --unified=3 "$incrementalRange" > diff.txt
if ($LASTEXITCODE -eq 0) {
Write-Host "Using incremental diff: $incrementalRange"
$diffBuilt = $true
} else {
Write-Host "Incremental diff command failed. Falling back to full diff."
}
} else {
Write-Host "PREV_SHA is not an ancestor of HEAD. Falling back to full diff."
}
} else {
Write-Host "PREV_SHA not found locally (maybe force-push). Falling back to full diff."
git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
}
} else {
git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
Write-Host "Using full PR diff against base."
}

if (-not $diffBuilt) {
git diff --unified=3 "$baseRange" > diff.txt
if ($LASTEXITCODE -ne 0) {
Write-Error "Failed to build diff for range: $baseRange"
exit 1
}
Write-Host "Using full PR diff against base: $baseRange"
}

Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
Expand Down