diff --git a/.github/workflows/ci-auto-readme-generator.yml b/.github/workflows/ci-auto-readme-generator.yml new file mode 100644 index 0000000..8afb738 --- /dev/null +++ b/.github/workflows/ci-auto-readme-generator.yml @@ -0,0 +1,24 @@ +name: CI - auto-readme-generator + +on: + push: + paths: + - "services/python-tools/tools/auto-readme-generator/**" + pull_request: + paths: + - "services/python-tools/tools/auto-readme-generator/**" + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install deps + run: pip install -r services/python-tools/tools/auto-readme-generator/requirements.txt -r services/python-tools/tools/auto-readme-generator/requirements-dev.txt + - name: Run tests + run: | + cd services/python-tools/tools/auto-readme-generator + pytest tests/ -v diff --git a/app/src/lib/tools/auto-readme-generator.ts b/app/src/lib/tools/auto-readme-generator.ts new file mode 100644 index 0000000..ee91880 --- /dev/null +++ b/app/src/lib/tools/auto-readme-generator.ts @@ -0,0 +1,42 @@ +import type { ToolDefinition } from "@/types"; + +export const autoReadmeGeneratorTool: ToolDefinition = { + id: "auto-readme-generator", + name: "Auto README Generator", + description: "Multi-agent pipeline that generates structured, validated READMEs.", + category: "documentation", + icon: "FileText", + status: "active", + outputFormat: "streaming-text", + + // Tier 2: runs in the unified Python tool runner + tier: "tier2", + + requiredFields: ["projectName", "projectDescription"], + defaultModel: "llama-3.3-70b", + buildSystemPrompt: () => "", + buildUserPrompt: ({ projectName, projectDescription, techStack }) => + JSON.stringify({ projectName, projectDescription, techStack }), + + inputs: [ + { + key: "projectName", + label: "Project Name", + type: "text", + placeholder: "e.g. my-fastapi-app", + }, + { + key: "projectDescription", + label: "Project Description", + type: "textarea", + placeholder: "What does your project do? Main features, target users.", + rows: 5, + }, + { + key: "techStack", + label: "Tech Stack (optional)", + type: "text", + placeholder: "e.g. Python, FastAPI, PostgreSQL, Docker", + }, + ], +}; diff --git a/app/src/lib/tools/registry.ts b/app/src/lib/tools/registry.ts index 63fd80b..e4db3be 100644 --- a/app/src/lib/tools/registry.ts +++ b/app/src/lib/tools/registry.ts @@ -3,6 +3,7 @@ import type { CategoryInfo, ToolCategory, ToolDefinition } from "@/types"; import { apiChangeAnalyzer } from "./api-change-analyzer"; import { apiValidator } from "./api-validator"; import { architectureDiagram } from "./architecture-diagram"; +import { autoReadmeGeneratorTool } from "./auto-readme-generator"; import { bugReplayer } from "./bug-replayer"; import { captionGenerator } from "./caption-generator"; // --- Core tool definitions --- @@ -114,6 +115,7 @@ export const tools: ToolDefinition[] = [ captionGenerator, seoWriter, deepResearch, // Tier 2: LangGraph multi-agent Python service + autoReadmeGeneratorTool, ]; // --------------------------------------------------------------------------- diff --git a/app/src/types/index.ts b/app/src/types/index.ts index 3db21f9..5ba04ee 100644 --- a/app/src/types/index.ts +++ b/app/src/types/index.ts @@ -32,6 +32,8 @@ export interface ToolDefinition { icon: string; /** Active or placeholder */ status: ToolStatus; + /** Output format hint for frontend rendering (e.g. "streaming-text") */ + outputFormat?: string; // --- Tier config --- /** diff --git a/services/python-tools/tools/auto-readme-generator/analyzer.py b/services/python-tools/tools/auto-readme-generator/analyzer.py new file mode 100644 index 0000000..3aeb82d --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/analyzer.py @@ -0,0 +1,110 @@ +import json +import re +from typing import Literal + +from pydantic import BaseModel, ValidationError + +from llm_client import call_oxlo_chat + +ANALYZER_MODEL = "deepseek-v3.2" + +DEFAULT_METADATA = { + "language": "unknown", + "package_manager": "unknown", + "framework": "unknown", + "entry_point": "unknown", + "project_type": "other", +} + + +class ProjectMetadata(BaseModel): + language: str = "unknown" + package_manager: str = "unknown" + framework: str = "unknown" + entry_point: str = "unknown" + project_type: Literal["library", "cli", "web-api", "web-app", "other"] = "other" + + +def _sanitize(value: str, max_len: int = 500) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + + +def _extract_json(text: str) -> str: + text = text.strip() + if text.startswith("```"): + text = re.sub(r"^```[a-zA-Z0-9_-]*", "", text) + text = re.sub(r"```$", "", text.strip()) + return text + + +def _parse_json(text: str) -> dict: + try: + parsed = json.loads(text) + if isinstance(parsed, dict): + return parsed + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, dict): + return item + except json.JSONDecodeError: + pass + + for pattern in (r"\{.*\}", r"\[.*\]"): + match = re.search(pattern, text, re.DOTALL) + if not match: + continue + try: + parsed = json.loads(match.group(0)) + except json.JSONDecodeError: + continue + if isinstance(parsed, dict): + return parsed + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, dict): + return item + + return DEFAULT_METADATA.copy() + + +async def analyze_project(name: str, description: str, tech_stack: str) -> dict: + safe_name = _sanitize(name) + safe_description = _sanitize(description, max_len=2000) + safe_tech_stack = _sanitize(tech_stack) + + system_prompt = ( + "You are a project analyzer. " + "Respond ONLY with a JSON object. No markdown, no explanation." + ) + project_data = json.dumps( + { + "name": safe_name, + "description": safe_description, + "tech_stack": safe_tech_stack, + }, + ensure_ascii=False, + ) + user_prompt = ( + f"Project data (JSON):\n{project_data}\n\n" + "Return JSON with keys: language, package_manager, framework, " + "entry_point, project_type (library|cli|web-api|web-app|other)." + ) + + try: + raw = await call_oxlo_chat( + ANALYZER_MODEL, + system_prompt, + user_prompt, + max_tokens=512, + temperature=0.2, + ) + + cleaned = _extract_json(raw) + parsed = _parse_json(cleaned) + return ProjectMetadata(**parsed).model_dump() + except ValidationError: + return DEFAULT_METADATA.copy() + except Exception: + return DEFAULT_METADATA.copy() diff --git a/services/python-tools/tools/auto-readme-generator/llm_client.py b/services/python-tools/tools/auto-readme-generator/llm_client.py new file mode 100644 index 0000000..a5efa1d --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/llm_client.py @@ -0,0 +1,59 @@ +import os + +import httpx + +OXLO_BASE_URL = os.getenv("OXLO_BASE_URL", "https://api.oxlo.ai/v1") +OXLO_API_KEY = os.getenv("OXLO_API_KEY", "") +_CLIENT: httpx.AsyncClient | None = None + + +class OxloError(RuntimeError): + pass + + +def _get_client() -> httpx.AsyncClient: + global _CLIENT + if _CLIENT is None: + _CLIENT = httpx.AsyncClient() + return _CLIENT + + +async def call_oxlo_chat( + model: str, + system_prompt: str, + user_prompt: str, + max_tokens: int = 2048, + temperature: float = 0.3, +) -> str: + if not OXLO_API_KEY: + raise OxloError("OXLO_API_KEY not configured") + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": max_tokens, + } + + client = _get_client() + resp = await client.post( + f"{OXLO_BASE_URL}/chat/completions", + headers={"Authorization": f"Bearer {OXLO_API_KEY}"}, + json=payload, + timeout=30, + ) + resp.raise_for_status() + data = resp.json() + choices = data.get("choices") + if not choices or not isinstance(choices, list): + raise OxloError(f"Unexpected API response: no choices returned. Response: {data}") + + message = choices[0].get("message", {}) if isinstance(choices[0], dict) else {} + content = message.get("content") + if content is None: + raise OxloError(f"Unexpected API response: content is None. Message: {message}") + + return content.strip() diff --git a/services/python-tools/tools/auto-readme-generator/planner.py b/services/python-tools/tools/auto-readme-generator/planner.py new file mode 100644 index 0000000..81fb5ff --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/planner.py @@ -0,0 +1,59 @@ +SECTION_PLANS = { + "web-api": [ + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "API Reference", + "Config", + "Contributing", + "License", + ], + "cli": [ + "Title", + "Badges", + "Description", + "Features", + "Installation", + "Usage", + "Config", + "Contributing", + "License", + ], + "library": [ + "Title", + "Badges", + "Description", + "Installation", + "Usage", + "API Reference", + "Contributing", + "License", + ], + "web-app": [ + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "Config", + "Contributing", + "License", + ], + "other": [ + "Title", + "Description", + "Installation", + "Usage", + "Contributing", + "License", + ], +} + + +def plan_sections(metadata: dict) -> list: + project_type = (metadata or {}).get("project_type", "other") + return SECTION_PLANS.get(project_type, SECTION_PLANS["other"]) diff --git a/services/python-tools/tools/auto-readme-generator/refiner.py b/services/python-tools/tools/auto-readme-generator/refiner.py new file mode 100644 index 0000000..33831a1 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/refiner.py @@ -0,0 +1,56 @@ +from typing import Awaitable, Callable, Optional + +from llm_client import call_oxlo_chat +from validator import validate_readme + +REFINER_MODEL = "llama-3.3-70b" + + +def _sanitize(value: str, max_len: int = 12000) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + + +async def refine_readme( + content: str, + issues: list, + metadata: dict, + section_plan: list, + call_model: Optional[Callable[[str, str, str, int, float], Awaitable[str]]] = None, +) -> str: + if not issues: + return content + + call_model = call_model or call_oxlo_chat + + for attempt in range(2): + issue_summary = "\n".join( + f"- [{item['type']}] {item['detail']}" for item in issues + ) + safe_issue_summary = _sanitize(issue_summary, max_len=2000) + safe_content = _sanitize(content, max_len=20000) + system_prompt = ( + "You are a technical writer. Fix the README to resolve the listed issues. " + "Return the full corrected README only." + ) + user_prompt = ( + f"Issues to fix:\n{safe_issue_summary}\n\n" + f"README:\n{safe_content}\n\n" + "Ensure all required sections are present, badges use https://img.shields.io/, " + "and code fences include a language tag." + ) + + content = await call_model( + REFINER_MODEL, + system_prompt, + user_prompt, + 4096, + 0.2, + ) + + issues = validate_readme(content, section_plan, metadata) + if not issues: + break + + return content diff --git a/services/python-tools/tools/auto-readme-generator/requirements-dev.txt b/services/python-tools/tools/auto-readme-generator/requirements-dev.txt new file mode 100644 index 0000000..918f578 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest==8.3.2 +pytest-asyncio==0.23.8 diff --git a/services/python-tools/tools/auto-readme-generator/requirements.txt b/services/python-tools/tools/auto-readme-generator/requirements.txt new file mode 100644 index 0000000..46333da --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/requirements.txt @@ -0,0 +1,2 @@ +httpx==0.27.0 +pydantic==2.7.1 diff --git a/services/python-tools/tools/auto-readme-generator/tests/conftest.py b/services/python-tools/tools/auto-readme-generator/tests/conftest.py new file mode 100644 index 0000000..62fa24b --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/conftest.py @@ -0,0 +1,6 @@ +import pathlib +import sys + +TOOL_DIR = pathlib.Path(__file__).resolve().parents[1] +if str(TOOL_DIR) not in sys.path: + sys.path.insert(0, str(TOOL_DIR)) diff --git a/services/python-tools/tools/auto-readme-generator/tests/pytest.ini b/services/python-tools/tools/auto-readme-generator/tests/pytest.ini new file mode 100644 index 0000000..2f4c80e --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_mode = auto diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py new file mode 100644 index 0000000..de9a0f0 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_pipeline.py @@ -0,0 +1,148 @@ +import pytest + +import analyzer +import refiner +import tool +import writer +from validator import validate_readme + + +async def _collect_stream(data: dict) -> str: + stream = await tool.run(data) + chunks = [] + async for chunk in stream: + chunks.append(chunk) + return "".join(chunks) + + +def _readme_web_api() -> str: + return ( + "# Title\n" + "## Badges\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "## Description\n" + "API service for auth.\n" + "## Features\n" + "- Login\n" + "## Quick Start\n" + "```bash\n" + "pip install example\n" + "```\n" + "## Usage\n" + "```bash\n" + "python -m example\n" + "```\n" + "## API Reference\n" + "See docs.\n" + "## Config\n" + "ENV vars.\n" + "## Contributing\n" + "PRs welcome.\n" + "## License\n" + "MIT\n" + ) + + +def _readme_other() -> str: + return ( + "# Title\n" + "## Description\n" + "Simple tool.\n" + "## Installation\n" + "```bash\n" + "pip install example\n" + "```\n" + "## Usage\n" + "```bash\n" + "example --help\n" + "```\n" + "## Contributing\n" + "PRs welcome.\n" + "## License\n" + "MIT\n" + ) + + +@pytest.mark.asyncio +async def test_pipeline_happy_path(monkeypatch): + async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + if "project analyzer" in system_prompt.lower(): + return ( + "{" + "\"language\": \"python\"," + "\"package_manager\": \"pip\"," + "\"framework\": \"fastapi\"," + "\"entry_point\": \"main.py\"," + "\"project_type\": \"web-api\"" + "}" + ) + return _readme_web_api() + + monkeypatch.setattr(analyzer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) + + output = await _collect_stream( + { + "projectName": "fastapi-auth", + "projectDescription": "Auth API with FastAPI", + "techStack": "Python, FastAPI", + } + ) + + assert "---RESULT---" in output + assert "[ERROR]" not in output + + +@pytest.mark.asyncio +async def test_pipeline_handles_malformed_analyzer_response(monkeypatch): + async def fake_call_oxlo_chat(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + if "project analyzer" in system_prompt.lower(): + return "not json" + return _readme_other() + + monkeypatch.setattr(analyzer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(writer, "call_oxlo_chat", fake_call_oxlo_chat) + monkeypatch.setattr(refiner, "call_oxlo_chat", fake_call_oxlo_chat) + + captured = {} + + async def fake_write_readme(metadata, section_plan): + captured["metadata"] = metadata + return _readme_other() + + monkeypatch.setattr(tool, "write_readme", fake_write_readme) + + output = await _collect_stream( + { + "projectName": "unknown", + "projectDescription": "Test", + "techStack": "", + } + ) + + assert "---RESULT---" in output + assert "## Installation" in output + assert captured["metadata"] == analyzer.DEFAULT_METADATA + + +@pytest.mark.asyncio +async def test_refiner_max_retries_returns_content(): + async def always_bad(model, system_prompt, user_prompt, max_tokens=2048, temperature=0.3): + return "# Title\n```python\nprint('hi')\n" + + content = "# Title\n```python\nprint('hi')\n" + metadata = {"package_manager": "npm"} + section_plan = ["Title"] + issues = validate_readme(content, section_plan, metadata) + + result = await refiner.refine_readme( + content, + issues, + metadata, + section_plan, + call_model=always_bad, + ) + + assert result + assert validate_readme(result, section_plan, metadata) diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_planner.py b/services/python-tools/tools/auto-readme-generator/tests/test_planner.py new file mode 100644 index 0000000..203e4c9 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_planner.py @@ -0,0 +1,16 @@ +from planner import plan_sections + + +def test_web_api_has_api_reference(): + plan = plan_sections({"project_type": "web-api"}) + assert "API Reference" in plan + + +def test_cli_has_installation(): + plan = plan_sections({"project_type": "cli"}) + assert "Installation" in plan + + +def test_unknown_type_falls_back_to_other(): + plan = plan_sections({"project_type": "xyz"}) + assert "Usage" in plan diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py new file mode 100644 index 0000000..a6a4a77 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_refiner.py @@ -0,0 +1,32 @@ +import pytest + +from refiner import refine_readme +from validator import validate_readme + + +async def _fake_call_model(model, system, user, max_tokens, temperature): + return ( + "# Title\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "# License\n" + "```python\nprint('hi')\n```\n" + "npm install\n" + ) + + +@pytest.mark.asyncio +async def test_refiner_resolves_issues(): + content = "# Title\n# License\n```\nraw code\n```\n" + metadata = {"package_manager": "npm"} + section_plan = ["Title", "License"] + issues = validate_readme(content, section_plan, metadata) + + result = await refine_readme( + content, + issues, + metadata, + section_plan, + call_model=_fake_call_model, + ) + + assert validate_readme(result, section_plan, metadata) == [] diff --git a/services/python-tools/tools/auto-readme-generator/tests/test_validator.py b/services/python-tools/tools/auto-readme-generator/tests/test_validator.py new file mode 100644 index 0000000..2907ac9 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tests/test_validator.py @@ -0,0 +1,35 @@ +from validator import validate_readme + + +def test_detects_missing_section(): + issues = validate_readme("# Title\n# Usage\n", ["Title", "Usage", "License"]) + assert any(item["type"] == "missing_section" for item in issues) + + +def test_detects_bad_badge(): + content = "![b](http://shields.io/badge/foo-bar)\n# Title\n# License\n" + issues = validate_readme(content, ["Title", "License"]) + assert any(item["type"] == "bad_badge" for item in issues) + + +def test_detects_fence_without_language(): + content = "# Title\n# License\n```\nsome code\n```\n" + issues = validate_readme(content, ["Title", "License"]) + assert any(item["type"] == "no_lang_fence" for item in issues) + + +def test_detects_missing_install_command_for_npm(): + content = "# Title\n# Installation\n# Usage\n" + issues = validate_readme(content, ["Title", "Installation", "Usage"], {"package_manager": "npm"}) + assert any(item["type"] == "install_command_mismatch" for item in issues) + + +def test_passes_valid_readme(): + content = ( + "# Title\n" + "![build](https://img.shields.io/badge/build-passing-green)\n" + "# License\n" + "```python\nprint('hi')\n```\n" + "npm install\n" + ) + assert validate_readme(content, ["Title", "License"], {"package_manager": "npm"}) == [] diff --git a/services/python-tools/tools/auto-readme-generator/tool.py b/services/python-tools/tools/auto-readme-generator/tool.py new file mode 100644 index 0000000..0b59594 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/tool.py @@ -0,0 +1,87 @@ +""" +Auto README Generator — Tool Entry Point +========================================= +Multi-agent pipeline: analyze, plan, write, validate, refine. +""" + +from analyzer import analyze_project +from planner import plan_sections +from writer import write_readme +from validator import validate_readme +from refiner import refine_readme + + +MANIFEST = { + "id": "auto-readme-generator", + "name": "Auto README Generator", + "description": "Multi-agent pipeline that generates structured, validated READMEs.", + "author": "Franco Ayala", + "version": "1.0.0", +} + + +async def run(data: dict): + project_name = data.get("projectName", "Unnamed Project") + description = data.get("projectDescription", "") + tech_stack = data.get("techStack", "") + + async def stream(): + if not description.strip(): + yield "[ERROR] Project description is required.\n" + return + + yield "[1/5] Analyzing project metadata...\n" + try: + metadata = await analyze_project(project_name, description, tech_stack) + except Exception as exc: + yield f"[ERROR] Analyzer failed: {exc}\n" + return + + yield "[2/5] Planning required sections...\n" + try: + section_plan = plan_sections(metadata) + except Exception as exc: + yield f"[ERROR] Planner failed: {exc}\n" + return + + yield "[3/5] Writing README...\n" + try: + readme_content = await write_readme(metadata, section_plan) + except Exception as exc: + yield f"[ERROR] Writer failed: {exc}\n" + return + + yield "[4/5] Validating sections and badges...\n" + try: + issues = validate_readme(readme_content, section_plan, metadata) + except Exception as exc: + yield f"[WARN] Validator failed, returning unvalidated result: {exc}\n" + issues = [] + + if issues: + yield f"[5/5] Found {len(issues)} issue(s), refining...\n" + try: + readme_content = await refine_readme( + readme_content, + issues, + metadata, + section_plan, + ) + except Exception as exc: + yield f"[WARN] Refiner failed, returning best effort: {exc}\n" + + try: + issues = validate_readme(readme_content, section_plan, metadata) + except Exception as exc: + yield f"[WARN] Post-refine validation failed: {exc}\n" + issues = [] + else: + yield "[5/5] Validation passed.\n" + + if issues: + yield f"[WARN] Validation still has {len(issues)} issue(s).\n" + + yield "\n---RESULT---\n" + yield readme_content + + return stream() diff --git a/services/python-tools/tools/auto-readme-generator/validator.py b/services/python-tools/tools/auto-readme-generator/validator.py new file mode 100644 index 0000000..4b3bd7c --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/validator.py @@ -0,0 +1,111 @@ +import re +from typing import Optional + +SHIELD_URL_PATTERN = re.compile(r"https?://[^\s\)\]]+") +VALID_SHIELD_PREFIX = "https://img.shields.io/" +FENCE_OPEN = re.compile(r"^`{3}(\w*)(?:\s.*)?$") + +INSTALL_COMMANDS = { + "npm": ["npm install", "npm ci"], + "yarn": ["yarn install", "yarn add"], + "pnpm": ["pnpm install", "pnpm add"], + "pip": ["pip install"], + "pipenv": ["pipenv install"], + "poetry": ["poetry install"], +} + + +def _has_section(content: str, section: str) -> bool: + pattern = re.compile( + rf"^#{{1,6}}\s+{re.escape(section)}\s*#*\s*$", + re.IGNORECASE | re.MULTILINE, + ) + return bool(pattern.search(content)) + + +def _find_fence_issues(content: str) -> list: + issues = [] + in_block = False + + for line in content.splitlines(): + match = FENCE_OPEN.match(line.strip()) + if match is None: + continue + + fence_lang = match.group(1) + if not in_block: + if not fence_lang: + issues.append({"type": "no_lang_fence", "detail": "code block missing language"}) + in_block = True + else: + in_block = False + + if in_block: + issues.append({"type": "unclosed_fence", "detail": "code block not closed"}) + + return issues + + +def _extract_code_blocks(content: str) -> list: + blocks = [] + in_block = False + current = [] + + for line in content.splitlines(): + match = FENCE_OPEN.match(line.strip()) + if match is not None: + if in_block: + blocks.append("\n".join(current)) + current = [] + in_block = False + else: + in_block = True + continue + + if in_block: + current.append(line) + + return blocks + + +def _has_install_command(content: str, package_manager: str) -> bool: + options = INSTALL_COMMANDS.get(package_manager.lower(), []) + if not options: + return False + + code_blocks = _extract_code_blocks(content) + for block in code_blocks: + if any(cmd in block for cmd in options): + return True + + for cmd in options: + pattern = re.compile(rf"(?m)^[\t >`]*{re.escape(cmd)}\b") + if pattern.search(content): + return True + + return False + + +def validate_readme(content: str, section_plan: list, metadata: Optional[dict] = None) -> list: + issues = [] + + for section in section_plan: + if not _has_section(content, section): + issues.append({"type": "missing_section", "detail": section}) + + for url in SHIELD_URL_PATTERN.findall(content): + if "shields.io" in url and not url.startswith(VALID_SHIELD_PREFIX): + issues.append({"type": "bad_badge", "detail": url}) + + issues.extend(_find_fence_issues(content)) + + if metadata: + package_manager = metadata.get("package_manager", "").lower() + if package_manager and package_manager in INSTALL_COMMANDS: + if not _has_install_command(content, package_manager): + issues.append({ + "type": "install_command_mismatch", + "detail": f"missing install command for {package_manager}", + }) + + return issues diff --git a/services/python-tools/tools/auto-readme-generator/writer.py b/services/python-tools/tools/auto-readme-generator/writer.py new file mode 100644 index 0000000..c636d46 --- /dev/null +++ b/services/python-tools/tools/auto-readme-generator/writer.py @@ -0,0 +1,87 @@ +import json + +from llm_client import call_oxlo_chat + +WRITER_MODEL = "llama-3.3-70b" + +ALLOWED_SECTIONS = { + "Title", + "Badges", + "Description", + "Features", + "Quick Start", + "Usage", + "API Reference", + "Config", + "Contributing", + "License", + "Installation", +} + +DEFAULT_SECTIONS = ["Title", "Description", "Installation", "Usage", "License"] + + +def _sanitize(value: str, max_len: int = 500) -> str: + if not isinstance(value, str): + return "" + return value.strip()[:max_len] + + +def _sanitize_section_plan(section_plan: list) -> list: + if not isinstance(section_plan, list) or not section_plan: + return [] + cleaned = [] + seen = set() + for section in section_plan: + if not isinstance(section, str): + continue + section = section.strip() + if section in ALLOWED_SECTIONS and section not in seen: + cleaned.append(section) + seen.add(section) + return cleaned + + +async def write_readme(metadata: dict, section_plan: list) -> str: + section_plan = _sanitize_section_plan(section_plan) + if not section_plan: + section_plan = DEFAULT_SECTIONS + package_manager = _sanitize((metadata or {}).get("package_manager", "unknown")) + language = _sanitize((metadata or {}).get("language", "unknown")) + framework = _sanitize((metadata or {}).get("framework", "unknown")) + entry_point = _sanitize((metadata or {}).get("entry_point", "unknown")) + + metadata_block = json.dumps( + { + "package_manager": package_manager, + "language": language, + "framework": framework, + "entry_point": entry_point, + }, + ensure_ascii=False, + ) + + system_prompt = ( + "You are a technical writer. Generate a complete README.md in markdown.\n" + "Include ALL of these sections in order: " + + ", ".join(section_plan) + + ".\n" + f"{metadata_block}\n" + "Use the metadata above to pick correct install commands and examples. " + "All shields.io badge URLs must start with https://img.shields.io/. " + "All code blocks must have a language identifier (```python, ```bash, etc.). " + "Return only the README markdown." + ) + + user_prompt = ( + "Write the README now. Use the metadata above to pick correct install " + "commands and examples. Ensure sections are present and properly titled." + ) + + return await call_oxlo_chat( + WRITER_MODEL, + system_prompt, + user_prompt, + max_tokens=4096, + temperature=0.3, + )