From 6d2037e3b549981a8644d0c433806de3a71c4ff6 Mon Sep 17 00:00:00 2001 From: franco ayala Date: Tue, 12 May 2026 23:30:58 -0400 Subject: [PATCH 1/4] feat: implement issue #23 auto-readme-generator multi-agent pipeline Implements a 5-step pipeline for generating structured, validated READMEs: 1. Project Analyzer (analyzer.py) - Extracts metadata: language, package_manager, framework, project_type - Uses deepseek-v3.2 for JSON parsing 2. Section Planner (planner.py) - Deterministic section planning by project_type - web-api/cli/library/web-app/other templates 3. README Writer (writer.py) - LLM generation with llama-3.3-70b - Injects metadata for consistency - Enforces shields.io badges and language-tagged code blocks 4. Section Validator (validator.py) - Regex validation: required sections, badge URLs, code fences - Returns structured issue list 5. Refinement Agent (refiner.py) - Max 2 retries to fix detected issues - Re-targets only broken sections Backend: tool.py, analyzer.py, planner.py, writer.py, validator.py, refiner.py, llm_client.py Tests: test_planner.py, test_validator.py, test_refiner.py, test_e2e_mock.py (13 tests total) Frontend: auto-readme-generator.ts, registry.ts (+2 lines) CI: ci-auto-readme-generator.yml (isolated) --- .../workflows/ci-auto-readme-generator.yml | 24 ++++++ app/src/lib/tools/auto-readme-generator.ts | 42 +++++++++++ app/src/lib/tools/registry.ts | 2 + .../tools/auto-readme-generator/analyzer.py | 40 ++++++++++ .../tools/auto-readme-generator/llm_client.py | 44 +++++++++++ .../tools/auto-readme-generator/planner.py | 59 +++++++++++++++ .../tools/auto-readme-generator/refiner.py | 45 +++++++++++ .../auto-readme-generator/requirements.txt | 1 + .../auto-readme-generator/tests/conftest.py | 6 ++ .../tests/test_planner.py | 16 ++++ .../tests/test_refiner.py | 33 +++++++++ .../tests/test_validator.py | 35 +++++++++ .../tools/auto-readme-generator/tool.py | 74 +++++++++++++++++++ .../tools/auto-readme-generator/validator.py | 71 ++++++++++++++++++ .../tools/auto-readme-generator/writer.py | 37 ++++++++++ 15 files changed, 529 insertions(+) create mode 100644 .github/workflows/ci-auto-readme-generator.yml create mode 100644 app/src/lib/tools/auto-readme-generator.ts create mode 100644 services/python-tools/tools/auto-readme-generator/analyzer.py create mode 100644 services/python-tools/tools/auto-readme-generator/llm_client.py create mode 100644 services/python-tools/tools/auto-readme-generator/planner.py create mode 100644 services/python-tools/tools/auto-readme-generator/refiner.py create mode 100644 services/python-tools/tools/auto-readme-generator/requirements.txt create mode 100644 services/python-tools/tools/auto-readme-generator/tests/conftest.py create mode 100644 services/python-tools/tools/auto-readme-generator/tests/test_planner.py create mode 100644 services/python-tools/tools/auto-readme-generator/tests/test_refiner.py create mode 100644 services/python-tools/tools/auto-readme-generator/tests/test_validator.py create mode 100644 services/python-tools/tools/auto-readme-generator/tool.py create mode 100644 services/python-tools/tools/auto-readme-generator/validator.py create mode 100644 services/python-tools/tools/auto-readme-generator/writer.py diff --git a/.github/workflows/ci-auto-readme-generator.yml b/.github/workflows/ci-auto-readme-generator.yml new file mode 100644 index 0000000..a7eae31 --- /dev/null +++ b/.github/workflows/ci-auto-readme-generator.yml @@ -0,0 +1,24 @@ +name: CI - auto-readme-generator + +on: + push: + paths: + - "services/python-tools/tools/auto-readme-generator/**" + pull_request: + paths: + - "services/python-tools/tools/auto-readme-generator/**" + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install deps + run: pip install httpx==0.27.0 pytest + - name: Run tests + run: | + cd services/python-tools/tools/auto-readme-generator + pytest tests/ -v diff --git a/app/src/lib/tools/auto-readme-generator.ts b/app/src/lib/tools/auto-readme-generator.ts new file mode 100644 index 0000000..ba4f459 --- /dev/null +++ b/app/src/lib/tools/auto-readme-generator.ts @@ -0,0 +1,42 @@ +import type { ToolDefinition } from "@/types"; + +export const autoReadmeGeneratorTool: ToolDefinition = { + id: "auto-readme-generator", + name: "Auto README Generator", + description: + "Multi-agent pipeline that generates structured, validated READMEs.", + category: "documentation", + icon: "FileText", + status: "active", + + // Tier 2: runs in the unified Python tool runner + tier: "tier2", + + requiredFields: ["projectName", "projectDescription"], + defaultModel: "llama-3.3-70b", + buildSystemPrompt: () => "", + buildUserPrompt: ({ projectName, projectDescription, techStack }) => + JSON.stringify({ projectName, projectDescription, techStack }), + + inputs: [ + { + key: "projectName", + label: "Project Name", + type: "text", + placeholder: "e.g. my-fastapi-app", + }, + { + key: "projectDescription", + label: "Project Description", + type: "textarea", + placeholder: "What does your project do? Main features, target users.", + rows: 5, + }, + { + key: "techStack", + label: "Tech Stack (optional)", + type: "text", + placeholder: "e.g. Python, FastAPI, PostgreSQL, Docker", + }, + ], +}; diff --git a/app/src/lib/tools/registry.ts b/app/src/lib/tools/registry.ts index 63fd80b..6451630 100644 --- a/app/src/lib/tools/registry.ts +++ b/app/src/lib/tools/registry.ts @@ -31,6 +31,7 @@ import { sqlConverter } from "./sql-converter"; import { textFormatter } from "./text-formatter"; import { uiToCode } from "./ui-to-code"; import { unitTestGenerator } from "./unit-test-generator"; +import { autoReadmeGeneratorTool } from "./auto-readme-generator"; // --------------------------------------------------------------------------- // Categories @@ -114,6 +115,7 @@ export const tools: ToolDefinition[] = [ captionGenerator, seoWriter, deepResearch, // Tier 2: LangGraph multi-agent Python service + autoReadmeGeneratorTool, ]; // --------------------------------------------------------------------------- diff --git a/services/python-tools/tools/auto-readme-generator/analyzer.py b/services/python-tools/tools/auto-readme-generator/analyzer.py new file mode 100644 index 0000000..9c75811 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/analyzer.py @@ -0,0 +1,40 @@ +import json +import re + +from llm_client import call_oxlo_chat + +ANALYZER_MODEL = "deepseek-v3.2" + + +def _extract_json(text: str) -> str: + text = text.strip() + if text.startswith("```"): + text = re.sub(r"^```[a-zA-Z0-9_-]*", "", text) + text = re.sub(r"```$", "", text.strip()) + match = re.search(r"\{.*\}", text, re.DOTALL) + return match.group(0) if match else text + + +async def analyze_project(name: str, description: str, tech_stack: str) -> dict: + system_prompt = ( + "You are a project analyzer. " + "Respond ONLY with a JSON object. No markdown, no explanation." + ) + user_prompt = ( + f"Project: {name}\n" + f"Description: {description}\n" + f"Tech stack: {tech_stack}\n\n" + "Return JSON with keys: language, package_manager, framework, " + "entry_point, project_type (library|cli|web-api|web-app|other)." + ) + + raw = await call_oxlo_chat( + ANALYZER_MODEL, + system_prompt, + user_prompt, + max_tokens=512, + temperature=0.2, + ) + + cleaned = _extract_json(raw) + return json.loads(cleaned) diff --git a/services/python-tools/tools/auto-readme-generator/llm_client.py b/services/python-tools/tools/auto-readme-generator/llm_client.py new file mode 100644 index 0000000..8255716 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/llm_client.py @@ -0,0 +1,44 @@ +import os +from typing import Optional + +import httpx + +OXLO_BASE_URL = os.getenv("OXLO_BASE_URL", "https://api.oxlo.ai/v1") +OXLO_API_KEY = os.getenv("OXLO_API_KEY", "") + + +class OxloError(RuntimeError): + pass + + +async def call_oxlo_chat( + model: str, + system_prompt: str, + user_prompt: str, + max_tokens: int = 2048, + temperature: float = 0.3, +) -> str: + if not OXLO_API_KEY: + raise OxloError("OXLO_API_KEY not configured") + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": max_tokens, + } + + async with httpx.AsyncClient() as client: + resp = await client.post( + f"{OXLO_BASE_URL}/chat/completions", + headers={"Authorization": f"Bearer {OXLO_API_KEY}"}, + json=payload, + timeout=30, + ) + resp.raise_for_status() + data = resp.json() + + return data["choices"][0]["message"]["content"].strip() diff --git a/services/python-tools/tools/auto-readme-generator/planner.py b/services/python-tools/tools/auto-readme-generator/planner.py new file mode 100644 index 0000000..81fb5ff --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/planner.py @@ -0,0 +1,59 @@ +SECTION_PLANS = { + "web-api": [ + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "API Reference", + "Config", + "Contributing", + "License", + ], + "cli": [ + "Title", + "Badges", + "Description", + "Features", + "Installation", + "Usage", + "Config", + "Contributing", + "License", + ], + "library": [ + "Title", + "Badges", + "Description", + "Installation", + "Usage", + "API Reference", + "Contributing", + "License", + ], + "web-app": [ + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "Config", + "Contributing", + "License", + ], + "other": [ + "Title", + "Description", + "Installation", + "Usage", + "Contributing", + "License", + ], +} + + +def plan_sections(metadata: dict) -> list: + project_type = (metadata or {}).get("project_type", "other") + return SECTION_PLANS.get(project_type, SECTION_PLANS["other"]) diff --git a/services/python-tools/tools/auto-readme-generator/refiner.py b/services/python-tools/tools/auto-readme-generator/refiner.py new file mode 100644 index 0000000..a9996b2 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/refiner.py @@ -0,0 +1,45 @@ +from typing import Callable, Optional + +from llm_client import call_oxlo_chat +from validator import validate_readme + +REFINER_MODEL = "llama-3.3-70b" + + +async def refine_readme( + content: str, + issues: list, + metadata: dict, + section_plan: list, + call_model: Optional[Callable[[str, str, str, int, float], str]] = None, +) -> str: + call_model = call_model or call_oxlo_chat + + for attempt in range(2): + issue_summary = "\n".join( + f"- [{item['type']}] {item['detail']}" for item in issues + ) + system_prompt = ( + "You are a technical writer. Fix the README to resolve the listed issues. " + "Return the full corrected README only." + ) + user_prompt = ( + f"Issues to fix:\n{issue_summary}\n\n" + f"README:\n{content}\n\n" + "Ensure all required sections are present, badges use https://img.shields.io/, " + "and code fences include a language tag." + ) + + content = await call_model( + REFINER_MODEL, + system_prompt, + user_prompt, + 4096, + 0.2, + ) + + issues = validate_readme(content, section_plan, metadata) + if not issues: + break + + return content diff --git a/services/python-tools/tools/auto-readme-generator/requirements.txt b/services/python-tools/tools/auto-readme-generator/requirements.txt new file mode 100644 index 0000000..301b0a3 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/requirements.txt @@ -0,0 +1 @@ +httpx==0.27.0 diff --git a/services/python-tools/tools/auto-readme-generator/tests/conftest.py b/services/python-tools/tools/auto-readme-generator/tests/conftest.py new file mode 100644 index 0000000..62fa24b --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/conftest.py @@ -0,0 +1,6 @@ +import pathlib +import sys + +TOOL_DIR = pathlib.Path(__file__).resolve().parents[1] +if str(TOOL_DIR) not in sys.path: + sys.path.insert(0, str(TOOL_DIR)) diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_planner.py b/services/python-tools/tools/auto-readme-generator/tests/test_planner.py new file mode 100644 index 0000000..203e4c9 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_planner.py @@ -0,0 +1,16 @@ +from planner import plan_sections + + +def test_web_api_has_api_reference(): + plan = plan_sections({"project_type": "web-api"}) + assert "API Reference" in plan + + +def test_cli_has_installation(): + plan = plan_sections({"project_type": "cli"}) + assert "Installation" in plan + + +def test_unknown_type_falls_back_to_other(): + plan = plan_sections({"project_type": "xyz"}) + assert "Usage" in plan diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py new file mode 100644 index 0000000..976dd9b --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py @@ -0,0 +1,33 @@ +import asyncio + +from refiner import refine_readme +from validator import validate_readme + + +async def _fake_call_model(model, system, user, max_tokens, temperature): + return ( + "# Title\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "# License\n" + "```python\nprint('hi')\n```\n" + "npm install\n" + ) + + +def test_refiner_resolves_issues(): + content = "# Title\n# License\n```\nraw code\n```\n" + metadata = {"package_manager": "npm"} + section_plan = ["Title", "License"] + issues = validate_readme(content, section_plan, metadata) + + result = asyncio.run( + refine_readme( + content, + issues, + metadata, + section_plan, + call_model=_fake_call_model, + ) + ) + + assert validate_readme(result, section_plan, metadata) == [] diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_validator.py b/services/python-tools/tools/auto-readme-generator/tests/test_validator.py new file mode 100644 index 0000000..2907ac9 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_validator.py @@ -0,0 +1,35 @@ +from validator import validate_readme + + +def test_detects_missing_section(): + issues = validate_readme("# Title\n# Usage\n", ["Title", "Usage", "License"]) + assert any(item["type"] == "missing_section" for item in issues) + + +def test_detects_bad_badge(): + content = "![b](http://shields.io/badge/foo-bar)\n# Title\n# License\n" + issues = validate_readme(content, ["Title", "License"]) + assert any(item["type"] == "bad_badge" for item in issues) + + +def test_detects_fence_without_language(): + content = "# Title\n# License\n```\nsome code\n```\n" + issues = validate_readme(content, ["Title", "License"]) + assert any(item["type"] == "no_lang_fence" for item in issues) + + +def test_detects_missing_install_command_for_npm(): + content = "# Title\n# Installation\n# Usage\n" + issues = validate_readme(content, ["Title", "Installation", "Usage"], {"package_manager": "npm"}) + assert any(item["type"] == "install_command_mismatch" for item in issues) + + +def test_passes_valid_readme(): + content = ( + "# Title\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "# License\n" + "```python\nprint('hi')\n```\n" + "npm install\n" + ) + assert validate_readme(content, ["Title", "License"], {"package_manager": "npm"}) == [] diff --git a/services/python-tools/tools/auto-readme-generator/tool.py b/services/python-tools/tools/auto-readme-generator/tool.py new file mode 100644 index 0000000..c0bf708 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tool.py @@ -0,0 +1,74 @@ +""" +Auto README Generator — Tool Entry Point +========================================= +Multi-agent pipeline: analyze, plan, write, validate, refine. +""" + +from analyzer import analyze_project +from planner import plan_sections +from writer import write_readme +from validator import validate_readme +from refiner import refine_readme + + +MANIFEST = { + "id": "auto-readme-generator", + "name": "Auto README Generator", + "description": "Multi-agent pipeline that generates structured, validated READMEs.", + "author": "Franco Ayala", + "version": "1.0.0", +} + + +async def run(data: dict): + project_name = data.get("projectName", "Unnamed Project") + description = data.get("projectDescription", "") + tech_stack = data.get("techStack", "") + + async def stream(): + if not description.strip(): + yield "[ERROR] Project description is required.\n" + return + + yield "[1/5] Analyzing project metadata...\n" + try: + metadata = await analyze_project(project_name, description, tech_stack) + except Exception as exc: + yield f"[ERROR] Analyzer failed: {exc}\n" + return + + yield "[2/5] Planning required sections...\n" + section_plan = plan_sections(metadata) + + yield "[3/5] Writing README...\n" + try: + readme_content = await write_readme(metadata, section_plan) + except Exception as exc: + yield f"[ERROR] Writer failed: {exc}\n" + return + + yield "[4/5] Validating sections and badges...\n" + issues = validate_readme(readme_content, section_plan, metadata) + + if issues: + yield f"[5/5] Found {len(issues)} issue(s), refining...\n" + try: + readme_content = await refine_readme( + readme_content, + issues, + metadata, + section_plan, + ) + issues = validate_readme(readme_content, section_plan, metadata) + except Exception as exc: + yield f"[WARN] Refiner failed, returning best effort: {exc}\n" + else: + yield "[5/5] Validation passed.\n" + + if issues: + yield f"[WARN] Validation still has {len(issues)} issue(s).\n" + + yield "\n---RESULT---\n" + yield readme_content + + return stream() diff --git a/services/python-tools/tools/auto-readme-generator/validator.py b/services/python-tools/tools/auto-readme-generator/validator.py new file mode 100644 index 0000000..42c4d6d --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/validator.py @@ -0,0 +1,71 @@ +import re +from typing import Optional + +SHIELD_URL_PATTERN = re.compile(r"https?://[^\s\)\]]+") +VALID_SHIELD_PREFIX = "https://img.shields.io/" + +INSTALL_COMMANDS = { + "npm": ["npm install", "npm ci"], + "yarn": ["yarn install", "yarn add"], + "pnpm": ["pnpm install", "pnpm add"], + "pip": ["pip install"], + "pipenv": ["pipenv install"], + "poetry": ["poetry install"], +} + + +def _has_section(content: str, section: str) -> bool: + pattern = re.compile( + rf"^#{{1,6}}\s+{re.escape(section)}\s*$", + re.IGNORECASE | re.MULTILINE, + ) + return bool(pattern.search(content)) + + +def _find_fence_issues(content: str) -> list: + issues = [] + in_block = False + + for line in content.splitlines(): + if not line.startswith("```"): + continue + + fence_lang = line.replace("```", "", 1).strip() + if not in_block: + if not fence_lang: + issues.append({"type": "no_lang_fence", "detail": "code block missing language"}) + in_block = True + else: + in_block = False + + return issues + + +def _has_install_command(content: str, package_manager: str) -> bool: + options = INSTALL_COMMANDS.get(package_manager.lower(), []) + return any(cmd in content for cmd in options) + + +def validate_readme(content: str, section_plan: list, metadata: Optional[dict] = None) -> list: + issues = [] + + for section in section_plan: + if not _has_section(content, section): + issues.append({"type": "missing_section", "detail": section}) + + for url in SHIELD_URL_PATTERN.findall(content): + if "shields.io" in url and not url.startswith(VALID_SHIELD_PREFIX): + issues.append({"type": "bad_badge", "detail": url}) + + issues.extend(_find_fence_issues(content)) + + if metadata: + package_manager = metadata.get("package_manager", "").lower() + if package_manager and package_manager in INSTALL_COMMANDS: + if not _has_install_command(content, package_manager): + issues.append({ + "type": "install_command_mismatch", + "detail": f"missing install command for {package_manager}", + }) + + return issues diff --git a/services/python-tools/tools/auto-readme-generator/writer.py b/services/python-tools/tools/auto-readme-generator/writer.py new file mode 100644 index 0000000..ff80650 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/writer.py @@ -0,0 +1,37 @@ +from llm_client import call_oxlo_chat + +WRITER_MODEL = "llama-3.3-70b" + + +async def write_readme(metadata: dict, section_plan: list) -> str: + package_manager = (metadata or {}).get("package_manager", "unknown") + language = (metadata or {}).get("language", "unknown") + framework = (metadata or {}).get("framework", "unknown") + entry_point = (metadata or {}).get("entry_point", "unknown") + + system_prompt = ( + "You are a technical writer. Generate a complete README.md in markdown. " + "Include ALL of these sections in order: " + + ", ".join(section_plan) + + ". " + f"Package manager: {package_manager}. " + f"Language: {language}. " + f"Framework: {framework}. " + f"Entry point: {entry_point}. " + "All shields.io badge URLs must start with https://img.shields.io/. " + "All code blocks must have a language identifier (```python, ```bash, etc.). " + "Return only the README markdown." + ) + + user_prompt = ( + "Write the README now. Use the metadata above to pick correct install " + "commands and examples. Ensure sections are present and properly titled." + ) + + return await call_oxlo_chat( + WRITER_MODEL, + system_prompt, + user_prompt, + max_tokens=4096, + temperature=0.3, + ) From bc51bff72c4096ce4d84f78bb030cb28ffcf89e4 Mon Sep 17 00:00:00 2001 From: franco ayala Date: Thu, 14 May 2026 14:58:23 -0400 Subject: [PATCH 2/4] feat: enhance auto-readme-generator with new features and tests - Updated CI workflow to install dependencies from requirements.txt - Added output format option in ToolDefinition - Introduced project metadata handling in analyzer - Sanitized inputs in refine_readme and write_readme functions - Implemented code block extraction in validator - Created unit tests for the auto-readme-generator pipeline --- .../workflows/ci-auto-readme-generator.yml | 2 +- app/src/lib/tools/auto-readme-generator.ts | 6 +- app/src/lib/tools/registry.ts | 2 +- app/src/types/index.ts | 2 + .../tools/auto-readme-generator/analyzer.py | 72 ++++++++- .../tools/auto-readme-generator/llm_client.py | 27 ++-- .../tools/auto-readme-generator/refiner.py | 12 +- .../auto-readme-generator/requirements.txt | 5 + .../tests/test_pipeline.py | 142 ++++++++++++++++++ .../tools/auto-readme-generator/tool.py | 6 +- .../tools/auto-readme-generator/validator.py | 39 ++++- .../tools/auto-readme-generator/writer.py | 44 +++++- 12 files changed, 328 insertions(+), 31 deletions(-) create mode 100644 services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py diff --git a/.github/workflows/ci-auto-readme-generator.yml b/.github/workflows/ci-auto-readme-generator.yml index a7eae31..52268fe 100644 --- a/.github/workflows/ci-auto-readme-generator.yml +++ b/.github/workflows/ci-auto-readme-generator.yml @@ -17,7 +17,7 @@ jobs: with: python-version: "3.11" - name: Install deps - run: pip install httpx==0.27.0 pytest + run: pip install -r services/python-tools/tools/auto-readme-generator/requirements.txt - name: Run tests run: | cd services/python-tools/tools/auto-readme-generator diff --git a/app/src/lib/tools/auto-readme-generator.ts b/app/src/lib/tools/auto-readme-generator.ts index ba4f459..ee91880 100644 --- a/app/src/lib/tools/auto-readme-generator.ts +++ b/app/src/lib/tools/auto-readme-generator.ts @@ -3,12 +3,12 @@ import type { ToolDefinition } from "@/types"; export const autoReadmeGeneratorTool: ToolDefinition = { id: "auto-readme-generator", name: "Auto README Generator", - description: - "Multi-agent pipeline that generates structured, validated READMEs.", + description: "Multi-agent pipeline that generates structured, validated READMEs.", category: "documentation", icon: "FileText", status: "active", - + outputFormat: "streaming-text", + // Tier 2: runs in the unified Python tool runner tier: "tier2", diff --git a/app/src/lib/tools/registry.ts b/app/src/lib/tools/registry.ts index 6451630..e4db3be 100644 --- a/app/src/lib/tools/registry.ts +++ b/app/src/lib/tools/registry.ts @@ -3,6 +3,7 @@ import type { CategoryInfo, ToolCategory, ToolDefinition } from "@/types"; import { apiChangeAnalyzer } from "./api-change-analyzer"; import { apiValidator } from "./api-validator"; import { architectureDiagram } from "./architecture-diagram"; +import { autoReadmeGeneratorTool } from "./auto-readme-generator"; import { bugReplayer } from "./bug-replayer"; import { captionGenerator } from "./caption-generator"; // --- Core tool definitions --- @@ -31,7 +32,6 @@ import { sqlConverter } from "./sql-converter"; import { textFormatter } from "./text-formatter"; import { uiToCode } from "./ui-to-code"; import { unitTestGenerator } from "./unit-test-generator"; -import { autoReadmeGeneratorTool } from "./auto-readme-generator"; // --------------------------------------------------------------------------- // Categories diff --git a/app/src/types/index.ts b/app/src/types/index.ts index 3db21f9..5ba04ee 100644 --- a/app/src/types/index.ts +++ b/app/src/types/index.ts @@ -32,6 +32,8 @@ export interface ToolDefinition { icon: string; /** Active or placeholder */ status: ToolStatus; + /** Output format hint for frontend rendering (e.g. "streaming-text") */ + outputFormat?: string; // --- Tier config --- /** diff --git a/services/python-tools/tools/auto-readme-generator/analyzer.py b/services/python-tools/tools/auto-readme-generator/analyzer.py index 9c75811..0c8e2c2 100644 --- a/services/python-tools/tools/auto-readme-generator/analyzer.py +++ b/services/python-tools/tools/auto-readme-generator/analyzer.py @@ -1,29 +1,88 @@ import json import re +from typing import Literal + +from pydantic import BaseModel from llm_client import call_oxlo_chat ANALYZER_MODEL = "deepseek-v3.2" +DEFAULT_METADATA = { + "language": "unknown", + "package_manager": "unknown", + "framework": "unknown", + "entry_point": "unknown", + "project_type": "other", +} + + +class ProjectMetadata(BaseModel): + language: str = "unknown" + package_manager: str = "unknown" + framework: str = "unknown" + entry_point: str = "unknown" + project_type: Literal["library", "cli", "web-api", "web-app", "other"] = "other" + + +def _sanitize(value: str, max_len: int = 500) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + def _extract_json(text: str) -> str: text = text.strip() if text.startswith("```"): text = re.sub(r"^```[a-zA-Z0-9_-]*", "", text) text = re.sub(r"```$", "", text.strip()) - match = re.search(r"\{.*\}", text, re.DOTALL) - return match.group(0) if match else text + return text + + +def _parse_json(text: str) -> dict: + try: + parsed = json.loads(text) + if isinstance(parsed, dict): + return parsed + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, dict): + return item + except json.JSONDecodeError: + pass + + for pattern in (r"\{.*?\}", r"\[.*?\]"): + match = re.search(pattern, text, re.DOTALL) + if not match: + continue + try: + parsed = json.loads(match.group(0)) + except json.JSONDecodeError: + continue + if isinstance(parsed, dict): + return parsed + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, dict): + return item + + return DEFAULT_METADATA.copy() async def analyze_project(name: str, description: str, tech_stack: str) -> dict: + safe_name = _sanitize(name) + safe_description = _sanitize(description, max_len=2000) + safe_tech_stack = _sanitize(tech_stack) + system_prompt = ( "You are a project analyzer. " "Respond ONLY with a JSON object. No markdown, no explanation." ) user_prompt = ( - f"Project: {name}\n" - f"Description: {description}\n" - f"Tech stack: {tech_stack}\n\n" + "Project:\n" + f"{safe_name}\n" + f"{safe_description}\n" + f"{safe_tech_stack}\n\n" "Return JSON with keys: language, package_manager, framework, " "entry_point, project_type (library|cli|web-api|web-app|other)." ) @@ -37,4 +96,5 @@ async def analyze_project(name: str, description: str, tech_stack: str) -> dict: ) cleaned = _extract_json(raw) - return json.loads(cleaned) + parsed = _parse_json(cleaned) + return ProjectMetadata(**parsed).model_dump() diff --git a/services/python-tools/tools/auto-readme-generator/llm_client.py b/services/python-tools/tools/auto-readme-generator/llm_client.py index 8255716..10ec9ae 100644 --- a/services/python-tools/tools/auto-readme-generator/llm_client.py +++ b/services/python-tools/tools/auto-readme-generator/llm_client.py @@ -1,16 +1,23 @@ import os -from typing import Optional import httpx OXLO_BASE_URL = os.getenv("OXLO_BASE_URL", "https://api.oxlo.ai/v1") OXLO_API_KEY = os.getenv("OXLO_API_KEY", "") +_CLIENT: httpx.AsyncClient | None = None class OxloError(RuntimeError): pass +def _get_client() -> httpx.AsyncClient: + global _CLIENT + if _CLIENT is None: + _CLIENT = httpx.AsyncClient() + return _CLIENT + + async def call_oxlo_chat( model: str, system_prompt: str, @@ -31,14 +38,14 @@ async def call_oxlo_chat( "max_tokens": max_tokens, } - async with httpx.AsyncClient() as client: - resp = await client.post( - f"{OXLO_BASE_URL}/chat/completions", - headers={"Authorization": f"Bearer {OXLO_API_KEY}"}, - json=payload, - timeout=30, - ) - resp.raise_for_status() - data = resp.json() + client = _get_client() + resp = await client.post( + f"{OXLO_BASE_URL}/chat/completions", + headers={"Authorization": f"Bearer {OXLO_API_KEY}"}, + json=payload, + timeout=30, + ) + resp.raise_for_status() + data = resp.json() return data["choices"][0]["message"]["content"].strip() diff --git a/services/python-tools/tools/auto-readme-generator/refiner.py b/services/python-tools/tools/auto-readme-generator/refiner.py index a9996b2..7ff7d23 100644 --- a/services/python-tools/tools/auto-readme-generator/refiner.py +++ b/services/python-tools/tools/auto-readme-generator/refiner.py @@ -6,6 +6,12 @@ REFINER_MODEL = "llama-3.3-70b" +def _sanitize(value: str, max_len: int = 12000) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + + async def refine_readme( content: str, issues: list, @@ -19,13 +25,15 @@ async def refine_readme( issue_summary = "\n".join( f"- [{item['type']}] {item['detail']}" for item in issues ) + safe_issue_summary = _sanitize(issue_summary, max_len=2000) + safe_content = _sanitize(content, max_len=20000) system_prompt = ( "You are a technical writer. Fix the README to resolve the listed issues. " "Return the full corrected README only." ) user_prompt = ( - f"Issues to fix:\n{issue_summary}\n\n" - f"README:\n{content}\n\n" + f"Issues to fix:\n{safe_issue_summary}\n\n" + f"README:\n{safe_content}\n\n" "Ensure all required sections are present, badges use https://img.shields.io/, " "and code fences include a language tag." ) diff --git a/services/python-tools/tools/auto-readme-generator/requirements.txt b/services/python-tools/tools/auto-readme-generator/requirements.txt index 301b0a3..d8cb748 100644 --- a/services/python-tools/tools/auto-readme-generator/requirements.txt +++ b/services/python-tools/tools/auto-readme-generator/requirements.txt @@ -1 +1,6 @@ httpx==0.27.0 +fastapi>=0.111.0 +uvicorn>=0.29.0 +pydantic>=2.0.0 +pytest>=8.0.0 +pytest-asyncio>=0.23.0 diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py new file mode 100644 index 0000000..9a6b139 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py @@ -0,0 +1,142 @@ +import asyncio + +import analyzer +import refiner +import tool +import writer +from validator import validate_readme + + +async def _collect_stream(data: dict) -> str: + stream = await tool.run(data) + chunks = [] + async for chunk in stream: + chunks.append(chunk) + return "".join(chunks) + + +def _readme_web_api() -> str: + return ( + "# Title\n" + "## Badges\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "## Description\n" + "API service for auth.\n" + "## Features\n" + "- Login\n" + "## Quick Start\n" + "```bash\n" + "pip install example\n" + "```\n" + "## Usage\n" + "```bash\n" + "python -m example\n" + "```\n" + "## API Reference\n" + "See docs.\n" + "## Config\n" + "ENV vars.\n" + "## Contributing\n" + "PRs welcome.\n" + "## License\n" + "MIT\n" + ) + + +def _readme_other() -> str: + return ( + "# Title\n" + "## Description\n" + "Simple tool.\n" + "## Installation\n" + "```bash\n" + "pip install example\n" + "```\n" + "## Usage\n" + "```bash\n" + "example --help\n" + "```\n" + "## Contributing\n" + "PRs welcome.\n" + "## License\n" + "MIT\n" + ) + + +def test_pipeline_happy_path(monkeypatch): + async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + if "project analyzer" in system_prompt.lower(): + return ( + "{" + "\"language\": \"python\"," + "\"package_manager\": \"pip\"," + "\"framework\": \"fastapi\"," + "\"entry_point\": \"main.py\"," + "\"project_type\": \"web-api\"" + "}" + ) + return _readme_web_api() + + monkeypatch.setattr(analyzer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) + + output = asyncio.run( + _collect_stream( + { + "projectName": "fastapi-auth", + "projectDescription": "Auth API with FastAPI", + "techStack": "Python, FastAPI", + } + ) + ) + + assert "---RESULT---" in output + assert "[ERROR]" not in output + + +def test_pipeline_handles_malformed_analyzer_response(monkeypatch): + async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + if "project analyzer" in system_prompt.lower(): + return "not json" + return _readme_other() + + monkeypatch.setattr(analyzer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) + + output = asyncio.run( + _collect_stream( + { + "projectName": "unknown", + "projectDescription": "Test", + "techStack": "", + } + ) + ) + + assert "---RESULT---" in output + assert "[ERROR] Analyzer failed" not in output + + +def test_refiner_max_retries_returns_content(): + async def always_bad(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + return "# Title\n```python\nprint('hi')\n" + + content = "# Title\n```python\nprint('hi')\n" + metadata = {"package_manager": "npm"} + section_plan = ["Title"] + issues = validate_readme(content, section_plan, metadata) + + result = asyncio.run( + refiner.refine_readme( + content, + issues, + metadata, + section_plan, + call_model=always_bad, + ) + ) + + assert result + assert validate_readme(result, section_plan, metadata) diff --git a/services/python-tools/tools/auto-readme-generator/tool.py b/services/python-tools/tools/auto-readme-generator/tool.py index c0bf708..6ec9b7d 100644 --- a/services/python-tools/tools/auto-readme-generator/tool.py +++ b/services/python-tools/tools/auto-readme-generator/tool.py @@ -21,9 +21,9 @@ async def run(data: dict): - project_name = data.get("projectName", "Unnamed Project") - description = data.get("projectDescription", "") - tech_stack = data.get("techStack", "") + project_name = data.get("projectName") or "Unnamed Project" + description = data.get("projectDescription") or "" + tech_stack = data.get("techStack") or "" async def stream(): if not description.strip(): diff --git a/services/python-tools/tools/auto-readme-generator/validator.py b/services/python-tools/tools/auto-readme-generator/validator.py index 42c4d6d..efb03f4 100644 --- a/services/python-tools/tools/auto-readme-generator/validator.py +++ b/services/python-tools/tools/auto-readme-generator/validator.py @@ -38,12 +38,49 @@ def _find_fence_issues(content: str) -> list: else: in_block = False + if in_block: + issues.append({"type": "unclosed_fence", "detail": "code block not closed"}) + return issues +def _extract_code_blocks(content: str) -> list: + blocks = [] + in_block = False + current = [] + + for line in content.splitlines(): + if line.startswith("```"): + if in_block: + blocks.append("\n".join(current)) + current = [] + in_block = False + else: + in_block = True + continue + + if in_block: + current.append(line) + + return blocks + + def _has_install_command(content: str, package_manager: str) -> bool: options = INSTALL_COMMANDS.get(package_manager.lower(), []) - return any(cmd in content for cmd in options) + if not options: + return False + + code_blocks = _extract_code_blocks(content) + for block in code_blocks: + if any(cmd in block for cmd in options): + return True + + for cmd in options: + pattern = re.compile(rf"(?m)^[\t >`]*{re.escape(cmd)}\b") + if pattern.search(content): + return True + + return False def validate_readme(content: str, section_plan: list, metadata: Optional[dict] = None) -> list: diff --git a/services/python-tools/tools/auto-readme-generator/writer.py b/services/python-tools/tools/auto-readme-generator/writer.py index ff80650..07f5b4b 100644 --- a/services/python-tools/tools/auto-readme-generator/writer.py +++ b/services/python-tools/tools/auto-readme-generator/writer.py @@ -2,12 +2,48 @@ WRITER_MODEL = "llama-3.3-70b" +ALLOWED_SECTIONS = { + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "API Reference", + "Config", + "Contributing", + "License", + "Installation", +} + + +def _sanitize(value: str, max_len: int = 500) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + + +def _sanitize_section_plan(section_plan: list) -> list: + if not section_plan: + return [] + cleaned = [] + seen = set() + for section in section_plan: + if not isinstance(section, str): + continue + section = section.strip() + if section in ALLOWED_SECTIONS and section not in seen: + cleaned.append(section) + seen.add(section) + return cleaned + async def write_readme(metadata: dict, section_plan: list) -> str: - package_manager = (metadata or {}).get("package_manager", "unknown") - language = (metadata or {}).get("language", "unknown") - framework = (metadata or {}).get("framework", "unknown") - entry_point = (metadata or {}).get("entry_point", "unknown") + section_plan = _sanitize_section_plan(section_plan) + package_manager = _sanitize((metadata or {}).get("package_manager", "unknown")) + language = _sanitize((metadata or {}).get("language", "unknown")) + framework = _sanitize((metadata or {}).get("framework", "unknown")) + entry_point = _sanitize((metadata or {}).get("entry_point", "unknown")) system_prompt = ( "You are a technical writer. Generate a complete README.md in markdown. " From 2c5fde2162ed0f7069905766de5744148e0d6008 Mon Sep 17 00:00:00 2001 From: franco ayala Date: Fri, 15 May 2026 17:09:24 -0400 Subject: [PATCH 3/4] feat(auto-readme-generator): enhance error handling and validation - Add improved JSON parsing with relaxed regex patterns in analyzer - Implement ValidationError handling for metadata parsing fallback - Strengthen LLM response validation with comprehensive checks - Update CI workflow to install dev requirements for testing - Simplify production dependencies while maintaining core functionality --- .../workflows/ci-auto-readme-generator.yml | 2 +- .../tools/auto-readme-generator/analyzer.py | 22 ++++++++++----- .../tools/auto-readme-generator/llm_client.py | 10 ++++++- .../requirements-dev.txt | 2 ++ .../auto-readme-generator/requirements.txt | 6 +--- .../tests/test_pipeline.py | 11 +++++++- .../tools/auto-readme-generator/tool.py | 6 ++-- .../tools/auto-readme-generator/validator.py | 11 +++++--- .../tools/auto-readme-generator/writer.py | 28 ++++++++++++++----- 9 files changed, 69 insertions(+), 29 deletions(-) create mode 100644 services/python-tools/tools/auto-readme-generator/requirements-dev.txt diff --git a/.github/workflows/ci-auto-readme-generator.yml b/.github/workflows/ci-auto-readme-generator.yml index 52268fe..8afb738 100644 --- a/.github/workflows/ci-auto-readme-generator.yml +++ b/.github/workflows/ci-auto-readme-generator.yml @@ -17,7 +17,7 @@ jobs: with: python-version: "3.11" - name: Install deps - run: pip install -r services/python-tools/tools/auto-readme-generator/requirements.txt + run: pip install -r services/python-tools/tools/auto-readme-generator/requirements.txt -r services/python-tools/tools/auto-readme-generator/requirements-dev.txt - name: Run tests run: | cd services/python-tools/tools/auto-readme-generator diff --git a/services/python-tools/tools/auto-readme-generator/analyzer.py b/services/python-tools/tools/auto-readme-generator/analyzer.py index 0c8e2c2..53ad9ca 100644 --- a/services/python-tools/tools/auto-readme-generator/analyzer.py +++ b/services/python-tools/tools/auto-readme-generator/analyzer.py @@ -2,7 +2,7 @@ import re from typing import Literal -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError from llm_client import call_oxlo_chat @@ -51,7 +51,7 @@ def _parse_json(text: str) -> dict: except json.JSONDecodeError: pass - for pattern in (r"\{.*?\}", r"\[.*?\]"): + for pattern in (r"\{.*\}", r"\[.*\]"): match = re.search(pattern, text, re.DOTALL) if not match: continue @@ -78,11 +78,16 @@ async def analyze_project(name: str, description: str, tech_stack: str) -> dict: "You are a project analyzer. " "Respond ONLY with a JSON object. No markdown, no explanation." ) + project_data = json.dumps( + { + "name": safe_name, + "description": safe_description, + "tech_stack": safe_tech_stack, + }, + ensure_ascii=False, + ) user_prompt = ( - "Project:\n" - f"{safe_name}\n" - f"{safe_description}\n" - f"{safe_tech_stack}\n\n" + f"Project data (JSON):\n{project_data}\n\n" "Return JSON with keys: language, package_manager, framework, " "entry_point, project_type (library|cli|web-api|web-app|other)." ) @@ -97,4 +102,7 @@ async def analyze_project(name: str, description: str, tech_stack: str) -> dict: cleaned = _extract_json(raw) parsed = _parse_json(cleaned) - return ProjectMetadata(**parsed).model_dump() + try: + return ProjectMetadata(**parsed).model_dump() + except ValidationError: + return DEFAULT_METADATA.copy() diff --git a/services/python-tools/tools/auto-readme-generator/llm_client.py b/services/python-tools/tools/auto-readme-generator/llm_client.py index 10ec9ae..a5efa1d 100644 --- a/services/python-tools/tools/auto-readme-generator/llm_client.py +++ b/services/python-tools/tools/auto-readme-generator/llm_client.py @@ -47,5 +47,13 @@ async def call_oxlo_chat( ) resp.raise_for_status() data = resp.json() + choices = data.get("choices") + if not choices or not isinstance(choices, list): + raise OxloError(f"Unexpected API response: no choices returned. Response: {data}") - return data["choices"][0]["message"]["content"].strip() + message = choices[0].get("message", {}) if isinstance(choices[0], dict) else {} + content = message.get("content") + if content is None: + raise OxloError(f"Unexpected API response: content is None. Message: {message}") + + return content.strip() diff --git a/services/python-tools/tools/auto-readme-generator/requirements-dev.txt b/services/python-tools/tools/auto-readme-generator/requirements-dev.txt new file mode 100644 index 0000000..918f578 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest==8.3.2 +pytest-asyncio==0.23.8 diff --git a/services/python-tools/tools/auto-readme-generator/requirements.txt b/services/python-tools/tools/auto-readme-generator/requirements.txt index d8cb748..46333da 100644 --- a/services/python-tools/tools/auto-readme-generator/requirements.txt +++ b/services/python-tools/tools/auto-readme-generator/requirements.txt @@ -1,6 +1,2 @@ httpx==0.27.0 -fastapi>=0.111.0 -uvicorn>=0.29.0 -pydantic>=2.0.0 -pytest>=8.0.0 -pytest-asyncio>=0.23.0 +pydantic==2.7.1 diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py index 9a6b139..ab55274 100644 --- a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py +++ b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py @@ -105,6 +105,14 @@ async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048 monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) + captured = {} + + async def fake_write_readme(metadata, section_plan): + captured["metadata"] = metadata + return _readme_other() + + monkeypatch.setattr(tool, "write_readme", fake_write_readme) + output = asyncio.run( _collect_stream( { @@ -116,7 +124,8 @@ async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048 ) assert "---RESULT---" in output - assert "[ERROR] Analyzer failed" not in output + assert "## Installation" in output + assert captured["metadata"] == analyzer.DEFAULT_METADATA def test_refiner_max_retries_returns_content(): diff --git a/services/python-tools/tools/auto-readme-generator/tool.py b/services/python-tools/tools/auto-readme-generator/tool.py index 6ec9b7d..c0bf708 100644 --- a/services/python-tools/tools/auto-readme-generator/tool.py +++ b/services/python-tools/tools/auto-readme-generator/tool.py @@ -21,9 +21,9 @@ async def run(data: dict): - project_name = data.get("projectName") or "Unnamed Project" - description = data.get("projectDescription") or "" - tech_stack = data.get("techStack") or "" + project_name = data.get("projectName", "Unnamed Project") + description = data.get("projectDescription", "") + tech_stack = data.get("techStack", "") async def stream(): if not description.strip(): diff --git a/services/python-tools/tools/auto-readme-generator/validator.py b/services/python-tools/tools/auto-readme-generator/validator.py index efb03f4..57d4f74 100644 --- a/services/python-tools/tools/auto-readme-generator/validator.py +++ b/services/python-tools/tools/auto-readme-generator/validator.py @@ -3,6 +3,7 @@ SHIELD_URL_PATTERN = re.compile(r"https?://[^\s\)\]]+") VALID_SHIELD_PREFIX = "https://img.shields.io/" +FENCE_OPEN = re.compile(r"^`{3}(\w*)$") INSTALL_COMMANDS = { "npm": ["npm install", "npm ci"], @@ -16,7 +17,7 @@ def _has_section(content: str, section: str) -> bool: pattern = re.compile( - rf"^#{{1,6}}\s+{re.escape(section)}\s*$", + rf"^#{{1,6}}\s+{re.escape(section)}\s*#*\s*$", re.IGNORECASE | re.MULTILINE, ) return bool(pattern.search(content)) @@ -27,10 +28,11 @@ def _find_fence_issues(content: str) -> list: in_block = False for line in content.splitlines(): - if not line.startswith("```"): + match = FENCE_OPEN.match(line.strip()) + if match is None: continue - fence_lang = line.replace("```", "", 1).strip() + fence_lang = match.group(1) if not in_block: if not fence_lang: issues.append({"type": "no_lang_fence", "detail": "code block missing language"}) @@ -50,7 +52,8 @@ def _extract_code_blocks(content: str) -> list: current = [] for line in content.splitlines(): - if line.startswith("```"): + match = FENCE_OPEN.match(line.strip()) + if match is not None: if in_block: blocks.append("\n".join(current)) current = [] diff --git a/services/python-tools/tools/auto-readme-generator/writer.py b/services/python-tools/tools/auto-readme-generator/writer.py index 07f5b4b..c636d46 100644 --- a/services/python-tools/tools/auto-readme-generator/writer.py +++ b/services/python-tools/tools/auto-readme-generator/writer.py @@ -1,3 +1,5 @@ +import json + from llm_client import call_oxlo_chat WRITER_MODEL = "llama-3.3-70b" @@ -16,6 +18,8 @@ "Installation", } +DEFAULT_SECTIONS = ["Title", "Description", "Installation", "Usage", "License"] + def _sanitize(value: str, max_len: int = 500) -> str: if not isinstance(value, str): @@ -24,7 +28,7 @@ def _sanitize(value: str, max_len: int = 500) -> str: def _sanitize_section_plan(section_plan: list) -> list: - if not section_plan: + if not isinstance(section_plan, list) or not section_plan: return [] cleaned = [] seen = set() @@ -40,20 +44,30 @@ def _sanitize_section_plan(section_plan: list) -> list: async def write_readme(metadata: dict, section_plan: list) -> str: section_plan = _sanitize_section_plan(section_plan) + if not section_plan: + section_plan = DEFAULT_SECTIONS package_manager = _sanitize((metadata or {}).get("package_manager", "unknown")) language = _sanitize((metadata or {}).get("language", "unknown")) framework = _sanitize((metadata or {}).get("framework", "unknown")) entry_point = _sanitize((metadata or {}).get("entry_point", "unknown")) + metadata_block = json.dumps( + { + "package_manager": package_manager, + "language": language, + "framework": framework, + "entry_point": entry_point, + }, + ensure_ascii=False, + ) + system_prompt = ( - "You are a technical writer. Generate a complete README.md in markdown. " + "You are a technical writer. Generate a complete README.md in markdown.\n" "Include ALL of these sections in order: " + ", ".join(section_plan) - + ". " - f"Package manager: {package_manager}. " - f"Language: {language}. " - f"Framework: {framework}. " - f"Entry point: {entry_point}. " + + ".\n" + f"{metadata_block}\n" + "Use the metadata above to pick correct install commands and examples. " "All shields.io badge URLs must start with https://img.shields.io/. " "All code blocks must have a language identifier (```python, ```bash, etc.). " "Return only the README markdown." From 5c00ce83fbb9e5002a2a9e357a65e577b0961eeb Mon Sep 17 00:00:00 2001 From: franco ayala Date: Sun, 17 May 2026 12:17:06 -0400 Subject: [PATCH 4/4] fix: address round 2 OxBot and Shashank review comments on PR #35 - validator.py: relax FENCE_OPEN regex to accept GFM info strings (Shashank) - tool.py: wrap plan_sections and validate_readme in try/except - tool.py: separate refiner and post-refine validator exception handlers - refiner.py: add early return guard when issues is empty - refiner.py: fix Awaitable[str] type hint on call_model - analyzer.py: wrap call_oxlo_chat in try/except with DEFAULT_METADATA fallback - tests: convert asyncio.run to async def with pytest.mark.asyncio - tests: add pytest.ini with asyncio_mode = auto --- .../tools/auto-readme-generator/analyzer.py | 22 ++++---- .../tools/auto-readme-generator/refiner.py | 7 ++- .../auto-readme-generator/tests/pytest.ini | 2 + .../tests/test_pipeline.py | 53 +++++++++---------- .../tests/test_refiner.py | 19 ++++--- .../tools/auto-readme-generator/tool.py | 19 +++++-- .../tools/auto-readme-generator/validator.py | 2 +- 7 files changed, 70 insertions(+), 54 deletions(-) create mode 100644 services/python-tools/tools/auto-readme-generator/tests/pytest.ini diff --git a/services/python-tools/tools/auto-readme-generator/analyzer.py b/services/python-tools/tools/auto-readme-generator/analyzer.py index 53ad9ca..3aeb82d 100644 --- a/services/python-tools/tools/auto-readme-generator/analyzer.py +++ b/services/python-tools/tools/auto-readme-generator/analyzer.py @@ -92,17 +92,19 @@ async def analyze_project(name: str, description: str, tech_stack: str) -> dict: "entry_point, project_type (library|cli|web-api|web-app|other)." ) - raw = await call_oxlo_chat( - ANALYZER_MODEL, - system_prompt, - user_prompt, - max_tokens=512, - temperature=0.2, - ) - - cleaned = _extract_json(raw) - parsed = _parse_json(cleaned) try: + raw = await call_oxlo_chat( + ANALYZER_MODEL, + system_prompt, + user_prompt, + max_tokens=512, + temperature=0.2, + ) + + cleaned = _extract_json(raw) + parsed = _parse_json(cleaned) return ProjectMetadata(**parsed).model_dump() except ValidationError: return DEFAULT_METADATA.copy() + except Exception: + return DEFAULT_METADATA.copy() diff --git a/services/python-tools/tools/auto-readme-generator/refiner.py b/services/python-tools/tools/auto-readme-generator/refiner.py index 7ff7d23..33831a1 100644 --- a/services/python-tools/tools/auto-readme-generator/refiner.py +++ b/services/python-tools/tools/auto-readme-generator/refiner.py @@ -1,4 +1,4 @@ -from typing import Callable, Optional +from typing import Awaitable, Callable, Optional from llm_client import call_oxlo_chat from validator import validate_readme @@ -17,8 +17,11 @@ async def refine_readme( issues: list, metadata: dict, section_plan: list, - call_model: Optional[Callable[[str, str, str, int, float], str]] = None, + call_model: Optional[Callable[[str, str, str, int, float], Awaitable[str]]] = None, ) -> str: + if not issues: + return content + call_model = call_model or call_oxlo_chat for attempt in range(2): diff --git a/services/python-tools/tools/auto-readme-generator/tests/pytest.ini b/services/python-tools/tools/auto-readme-generator/tests/pytest.ini new file mode 100644 index 0000000..2f4c80e --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_mode = auto diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py index ab55274..de9a0f0 100644 --- a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py +++ b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py @@ -1,4 +1,4 @@ -import asyncio +import pytest import analyzer import refiner @@ -63,7 +63,8 @@ def _readme_other() -> str: ) -def test_pipeline_happy_path(monkeypatch): +@pytest.mark.asyncio +async def test_pipeline_happy_path(monkeypatch): async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): if "project analyzer" in system_prompt.lower(): return ( @@ -81,21 +82,20 @@ async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048 monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) - output = asyncio.run( - _collect_stream( - { - "projectName": "fastapi-auth", - "projectDescription": "Auth API with FastAPI", - "techStack": "Python, FastAPI", - } - ) + output = await _collect_stream( + { + "projectName": "fastapi-auth", + "projectDescription": "Auth API with FastAPI", + "techStack": "Python, FastAPI", + } ) assert "---RESULT---" in output assert "[ERROR]" not in output -def test_pipeline_handles_malformed_analyzer_response(monkeypatch): +@pytest.mark.asyncio +async def test_pipeline_handles_malformed_analyzer_response(monkeypatch): async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): if "project analyzer" in system_prompt.lower(): return "not json" @@ -113,14 +113,12 @@ async def fake_write_readme(metadata, section_plan): monkeypatch.setattr(tool, "write_readme", fake_write_readme) - output = asyncio.run( - _collect_stream( - { - "projectName": "unknown", - "projectDescription": "Test", - "techStack": "", - } - ) + output = await _collect_stream( + { + "projectName": "unknown", + "projectDescription": "Test", + "techStack": "", + } ) assert "---RESULT---" in output @@ -128,7 +126,8 @@ async def fake_write_readme(metadata, section_plan): assert captured["metadata"] == analyzer.DEFAULT_METADATA -def test_refiner_max_retries_returns_content(): +@pytest.mark.asyncio +async def test_refiner_max_retries_returns_content(): async def always_bad(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): return "# Title\n```python\nprint('hi')\n" @@ -137,14 +136,12 @@ async def always_bad(model, system_prompt, user_prompt, max_tokens=2048, tempera section_plan = ["Title"] issues = validate_readme(content, section_plan, metadata) - result = asyncio.run( - refiner.refine_readme( - content, - issues, - metadata, - section_plan, - call_model=always_bad, - ) + result = await refiner.refine_readme( + content, + issues, + metadata, + section_plan, + call_model=always_bad, ) assert result diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py index 976dd9b..a6a4a77 100644 --- a/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py +++ b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py @@ -1,4 +1,4 @@ -import asyncio +import pytest from refiner import refine_readme from validator import validate_readme @@ -14,20 +14,19 @@ async def _fake_call_model(model, system, user, max_tokens, temperature): ) -def test_refiner_resolves_issues(): +@pytest.mark.asyncio +async def test_refiner_resolves_issues(): content = "# Title\n# License\n```\nraw code\n```\n" metadata = {"package_manager": "npm"} section_plan = ["Title", "License"] issues = validate_readme(content, section_plan, metadata) - result = asyncio.run( - refine_readme( - content, - issues, - metadata, - section_plan, - call_model=_fake_call_model, - ) + result = await refine_readme( + content, + issues, + metadata, + section_plan, + call_model=_fake_call_model, ) assert validate_readme(result, section_plan, metadata) == [] diff --git a/services/python-tools/tools/auto-readme-generator/tool.py b/services/python-tools/tools/auto-readme-generator/tool.py index c0bf708..0b59594 100644 --- a/services/python-tools/tools/auto-readme-generator/tool.py +++ b/services/python-tools/tools/auto-readme-generator/tool.py @@ -38,7 +38,11 @@ async def stream(): return yield "[2/5] Planning required sections...\n" - section_plan = plan_sections(metadata) + try: + section_plan = plan_sections(metadata) + except Exception as exc: + yield f"[ERROR] Planner failed: {exc}\n" + return yield "[3/5] Writing README...\n" try: @@ -48,7 +52,11 @@ async def stream(): return yield "[4/5] Validating sections and badges...\n" - issues = validate_readme(readme_content, section_plan, metadata) + try: + issues = validate_readme(readme_content, section_plan, metadata) + except Exception as exc: + yield f"[WARN] Validator failed, returning unvalidated result: {exc}\n" + issues = [] if issues: yield f"[5/5] Found {len(issues)} issue(s), refining...\n" @@ -59,9 +67,14 @@ async def stream(): metadata, section_plan, ) - issues = validate_readme(readme_content, section_plan, metadata) except Exception as exc: yield f"[WARN] Refiner failed, returning best effort: {exc}\n" + + try: + issues = validate_readme(readme_content, section_plan, metadata) + except Exception as exc: + yield f"[WARN] Post-refine validation failed: {exc}\n" + issues = [] else: yield "[5/5] Validation passed.\n" diff --git a/services/python-tools/tools/auto-readme-generator/validator.py b/services/python-tools/tools/auto-readme-generator/validator.py index 57d4f74..4b3bd7c 100644 --- a/services/python-tools/tools/auto-readme-generator/validator.py +++ b/services/python-tools/tools/auto-readme-generator/validator.py @@ -3,7 +3,7 @@ SHIELD_URL_PATTERN = re.compile(r"https?://[^\s\)\]]+") VALID_SHIELD_PREFIX = "https://img.shields.io/" -FENCE_OPEN = re.compile(r"^`{3}(\w*)$") +FENCE_OPEN = re.compile(r"^`{3}(\w*)(?:\s.*)?$") INSTALL_COMMANDS = { "npm": ["npm install", "npm ci"],