Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions backend/app/api/routes_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,27 @@
from backend.app.services.evaluation_service import evaluate
from __future__ import annotations

from backend.app.schemas.evaluation import EvaluationRunRequest
from backend.app.services.evaluation_service import evaluate, evaluation_errors, evaluation_summary
from fastapi import APIRouter

router = APIRouter(prefix="/evaluation", tags=["evaluation"])


@router.post("/run")
def run_evaluation(payload: EvaluationRunRequest) -> dict[str, object]:
return evaluate(payload.benchmark_path)


@router.get("/run")
def run_evaluation() -> dict[str, object]:
def run_evaluation_legacy() -> dict[str, object]:
return evaluate()


@router.get("/summary")
def summary() -> dict[str, object]:
return evaluation_summary()


@router.get("/errors")
def errors() -> dict[str, object]:
return evaluation_errors()
42 changes: 41 additions & 1 deletion backend/app/api/routes_reports.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
from backend.app.services.report_service import demo_report
from __future__ import annotations

from backend.app.schemas.reports import ReportFromAnalysisRequest
from backend.app.services.report_service import (
create_report_from_analysis,
demo_report,
get_report,
get_report_content,
list_reports,
)
from fastapi import APIRouter, Response

router = APIRouter(prefix="/reports", tags=["reports"])


@router.post("/from-analysis")
def from_analysis(payload: ReportFromAnalysisRequest) -> dict[str, object]:
return create_report_from_analysis(payload.analysis, payload.title, payload.formats)


@router.get("")
def reports() -> list[dict[str, object]]:
return list_reports()


@router.get("/")
def reports_slash() -> list[dict[str, object]]:
return list_reports()


@router.get("/{report_id}")
def report_detail(report_id: str) -> dict[str, object]:
report = get_report(report_id)
return report or {"detail": "not found"}


@router.get("/{report_id}/download")
def report_download(report_id: str, format: str = "markdown") -> Response:
content = get_report_content(report_id, format)
if content is None:
return Response(content="Report or format not found", media_type="text/plain", status_code=404)
body, media_type, filename = content
return Response(content=body, media_type=media_type, headers={"content-disposition": f'attachment; filename="{filename}"'})


@router.get("/demo.md")
def report_demo() -> Response:
return Response(content=demo_report(), media_type="text/markdown")
26 changes: 25 additions & 1 deletion backend/app/api/routes_review.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,34 @@
from __future__ import annotations

from argument_risk_engine.review.models import ReviewFeedback

from backend.app.services.review_service import record_feedback
from backend.app.schemas.review import ReviewItemRequest
from backend.app.services.review_service import (
create_review_item,
get_review_summary,
list_review_items,
record_feedback,
)
from fastapi import APIRouter

router = APIRouter(prefix="/review", tags=["review"])


@router.get("/items")
def items() -> list[dict[str, object]]:
return list_review_items()


@router.post("/items")
def create_item(payload: ReviewItemRequest) -> dict[str, object]:
return create_review_item(payload)


@router.get("/summary")
def summary() -> dict[str, object]:
return get_review_summary()


@router.post("/feedback")
def feedback(payload: ReviewFeedback) -> dict[str, str]:
return record_feedback(payload)
3 changes: 3 additions & 0 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
)

app.include_router(routes_analysis.router)
app.include_router(routes_review.router)
app.include_router(routes_evaluation.router)
app.include_router(routes_reports.router)
app.include_router(routes_analysis.router, prefix="/api")
app.include_router(routes_taxonomy.router, prefix="/api")
app.include_router(routes_taxonomy_workbench.router, prefix="/api")
Expand Down
15 changes: 15 additions & 0 deletions backend/app/schemas/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
from __future__ import annotations

from typing import Any

from pydantic import BaseModel


class EvaluationRunRequest(BaseModel):
benchmark_path: str | None = None


class EvaluationResponse(BaseModel):
items: int
metrics: dict[str, float]
errors: dict[str, list[dict[str, Any]]]
false_positives: list[dict[str, Any]]
false_negatives: list[dict[str, Any]]
evidence_span_misses: list[dict[str, Any]]
analyses: list[dict[str, Any]]
disclaimer: str
29 changes: 27 additions & 2 deletions backend/app/schemas/reports.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
from pydantic import BaseModel
from __future__ import annotations

from typing import Any

from pydantic import BaseModel, Field


class ReportFromAnalysisRequest(BaseModel):
analysis: dict[str, Any]
title: str = "Argument Risk Report"
formats: list[str] = Field(default_factory=lambda: ["json", "markdown", "html"])


class ReportSummary(BaseModel):
report_id: str
title: str
created_at: str
analysis_id: str
formats: list[str]


class ReportResponse(BaseModel):
content: str
report_id: str
title: str
created_at: str
analysis_id: str
formats: list[str]
json: str | None = None
markdown: str | None = None
html: str | None = None
37 changes: 37 additions & 0 deletions backend/app/schemas/review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from typing import Any

from pydantic import BaseModel, Field


class ReviewItemRequest(BaseModel):
review_id: str | None = None
text_id: str
claim_id: str
claim_text: str
predicted_risks: list[dict[str, Any]] = Field(default_factory=list)
reviewer_decision: str
corrected_labels: list[str] = Field(default_factory=list)
corrected_evidence_spans: list[str] = Field(default_factory=list)
reviewer_notes: str = ""


class ReviewItemResponse(BaseModel):
review_id: str
text_id: str
claim_id: str
claim_text: str
predicted_risks: list[dict[str, Any]]
reviewer_decision: str
corrected_labels: list[str]
corrected_evidence_spans: list[str]
reviewer_notes: str
created_at: str


class ReviewSummaryResponse(BaseModel):
total_reviews: int
by_decision: dict[str, int]
corrected_label_counts: dict[str, int]
store_path: str
37 changes: 35 additions & 2 deletions backend/app/services/evaluation_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,40 @@
from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from argument_risk_engine.evaluation.runner import run_evaluation

from backend.app.core.paths import DATA_DIR

BENCHMARK_PATH = DATA_DIR / "benchmarks" / "mini_eval_set.jsonl"
EVALUATION_RESULT_PATH = DATA_DIR / "evaluation" / "last_evaluation.json"


def evaluate(benchmark_path: str | None = None) -> dict[str, Any]:
path = Path(benchmark_path) if benchmark_path else BENCHMARK_PATH
result = run_evaluation(path)
EVALUATION_RESULT_PATH.parent.mkdir(parents=True, exist_ok=True)
EVALUATION_RESULT_PATH.write_text(json.dumps(result, indent=2, ensure_ascii=False), encoding="utf-8")
return result


def evaluation_summary() -> dict[str, Any]:
result = _load_or_run()
return {
"items": result.get("items", 0),
"metrics": result.get("metrics", {}),
"disclaimer": result.get("disclaimer", ""),
}


def evaluation_errors() -> dict[str, Any]:
result = _load_or_run()
return result.get("errors", {"false_positives": [], "false_negatives": [], "evidence_span_misses": []})


def evaluate() -> dict[str, object]:
return run_evaluation(DATA_DIR / "benchmarks" / "mini_eval_set.jsonl")
def _load_or_run() -> dict[str, Any]:
if EVALUATION_RESULT_PATH.exists():
return json.loads(EVALUATION_RESULT_PATH.read_text(encoding="utf-8"))
return evaluate()
82 changes: 82 additions & 0 deletions backend/app/services/report_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,89 @@
from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from uuid import uuid4

from argument_risk_engine.reports.html import render_html_report
from argument_risk_engine.reports.json_export import render_json_report
from argument_risk_engine.reports.markdown import render_markdown_report

from backend.app.core.paths import REPORTS_DIR
from backend.app.services.analyzer_service import analyze

INDEX_PATH = REPORTS_DIR / "reports_index.json"
SUPPORTED_FORMATS = {"json", "markdown", "html"}


def create_report_from_analysis(analysis: dict[str, Any], title: str = "Argument Risk Report", formats: list[str] | None = None) -> dict[str, Any]:
requested = [fmt for fmt in (formats or ["json", "markdown", "html"]) if fmt in SUPPORTED_FORMATS]
if not requested:
requested = ["json"]
report_id = f"rpt_{uuid4().hex[:12]}"
created_at = datetime.now(timezone.utc).isoformat()
analysis_id = str(analysis.get("analysis_id") or analysis.get("text_id") or "unknown")
payload: dict[str, Any] = {
"report_id": report_id,
"title": title,
"created_at": created_at,
"analysis_id": analysis_id,
"formats": requested,
}
if "json" in requested:
payload["json"] = render_json_report(analysis)
if "markdown" in requested:
payload["markdown"] = render_markdown_report(analysis)
if "html" in requested:
payload["html"] = render_html_report(analysis)
_write_report(payload)
_append_index({key: payload[key] for key in ("report_id", "title", "created_at", "analysis_id", "formats")})
return payload


def list_reports() -> list[dict[str, Any]]:
return _read_index()


def get_report(report_id: str) -> dict[str, Any] | None:
path = _report_path(report_id)
if not path.exists():
return None
return json.loads(path.read_text(encoding="utf-8"))


def get_report_content(report_id: str, report_format: str) -> tuple[str, str, str] | None:
report = get_report(report_id)
if not report or report_format not in SUPPORTED_FORMATS or not report.get(report_format):
return None
extension = "md" if report_format == "markdown" else report_format
media_type = {"json": "application/json", "markdown": "text/markdown", "html": "text/html"}[report_format]
return str(report[report_format]), media_type, f"{report_id}.{extension}"


def demo_report() -> str:
return render_markdown_report(analyze("Everyone always caused this problem."))


def _write_report(report: dict[str, Any]) -> None:
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
_report_path(str(report["report_id"])).write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")


def _report_path(report_id: str) -> Path:
safe = "".join(char for char in report_id if char.isalnum() or char in {"_", "-"})
return REPORTS_DIR / f"{safe}.json"


def _read_index() -> list[dict[str, Any]]:
if not INDEX_PATH.exists():
return []
return json.loads(INDEX_PATH.read_text(encoding="utf-8"))


def _append_index(summary: dict[str, Any]) -> None:
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
records = [item for item in _read_index() if item.get("report_id") != summary["report_id"]]
records.insert(0, summary)
INDEX_PATH.write_text(json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8")
31 changes: 29 additions & 2 deletions backend/app/services/review_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,34 @@
from argument_risk_engine.review.models import ReviewFeedback
from argument_risk_engine.review.store import append_feedback
from __future__ import annotations

from argument_risk_engine.review.models import ReviewFeedback, ReviewItem
from argument_risk_engine.review.store import (
append_feedback,
append_review_item,
read_review_items,
review_summary,
)

from backend.app.core.paths import REVIEW_STORE_PATH
from backend.app.schemas.review import ReviewItemRequest


def list_review_items() -> list[dict[str, object]]:
return [item.model_dump() for item in read_review_items(REVIEW_STORE_PATH)]


def create_review_item(payload: ReviewItemRequest | ReviewItem) -> dict[str, object]:
if isinstance(payload, ReviewItem):
item = payload
else:
data = payload.model_dump()
if not data.get("review_id"):
data.pop("review_id", None)
item = ReviewItem(**data)
return append_review_item(REVIEW_STORE_PATH, item).model_dump()


def get_review_summary() -> dict[str, object]:
return review_summary(REVIEW_STORE_PATH)


def record_feedback(feedback: ReviewFeedback) -> dict[str, str]:
Expand Down
3 changes: 2 additions & 1 deletion data/benchmarks/mini_eval_set.jsonl
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
{"text":"They are vermin.","expected":["dehumanizing_language"]}
{"id":"sample_001","text":"They are vermin.","gold_labels":["dehumanizing_language"],"gold_evidence_spans":["vermin"],"difficulty":"easy","notes":"Simple dehumanizing-language smoke test for MVP evaluation."}
{"id":"sample_002","text":"This proposal has tradeoffs and the evidence is mixed.","gold_labels":[],"gold_evidence_spans":[],"difficulty":"easy","notes":"Neutral reasoning sample used to monitor over-classification."}
Loading