diff --git a/okf/src/reference_agent/__init__.py b/okf/src/reference_agent/__init__.py index 3dc1f76..223f3d8 100644 --- a/okf/src/reference_agent/__init__.py +++ b/okf/src/reference_agent/__init__.py @@ -1 +1,6 @@ __version__ = "0.1.0" + +# Default Gemini model used by the agents and the index synthesizer. +# Defined here (a dependency-free leaf module) so it has a single source of +# truth without creating an import cycle between agent.py and bundle.index. +DEFAULT_MODEL = "gemini-flash-latest" diff --git a/okf/src/reference_agent/agent.py b/okf/src/reference_agent/agent.py index 1201ae0..00b574d 100644 --- a/okf/src/reference_agent/agent.py +++ b/okf/src/reference_agent/agent.py @@ -13,7 +13,7 @@ ) from reference_agent.tools.web_tools import fetch_url -DEFAULT_MODEL = "gemini-flash-latest" +from reference_agent import DEFAULT_MODEL def _load_prompt(filename: str) -> str: diff --git a/okf/src/reference_agent/bundle/document.py b/okf/src/reference_agent/bundle/document.py index f0280b9..1e22308 100644 --- a/okf/src/reference_agent/bundle/document.py +++ b/okf/src/reference_agent/bundle/document.py @@ -5,6 +5,11 @@ import yaml +# Keys this agent requires before it will write a document. Note this is +# stricter than OKF conformance: the SPEC (§4.1, §9) requires only `type` +# and treats title/description/timestamp as recommended. We enforce all +# four as a quality bar for *generated* documents — hand-authored bundles +# with only `type` are still SPEC-conformant and accepted by consumers. REQUIRED_FRONTMATTER_KEYS = ("type", "title", "description", "timestamp") _FRONTMATTER_DELIM = "---" diff --git a/okf/src/reference_agent/bundle/index.py b/okf/src/reference_agent/bundle/index.py index f082975..25d1e3b 100644 --- a/okf/src/reference_agent/bundle/index.py +++ b/okf/src/reference_agent/bundle/index.py @@ -4,11 +4,16 @@ from pathlib import Path from typing import Callable +from reference_agent import DEFAULT_MODEL from reference_agent.bundle.document import OKFDocument from reference_agent.bundle.synthesizer import synthesize_description _INDEX_FILE = "index.md" -_FALLBACK_MODEL = "gemini-flash-latest" + +# SPEC §11: bundles MAY declare the OKF version they target via +# `okf_version` in the bundle-root index.md frontmatter — the only place +# frontmatter is permitted in an index file. +_OKF_VERSION = "0.1" def _load_doc(path: Path) -> OKFDocument | None: @@ -34,6 +39,12 @@ def _build_index_text(entries: list[tuple[str, str, str, str]]) -> str: return "\n\n".join(sections) + "\n" +def _with_root_frontmatter(body: str) -> str: + return OKFDocument( + frontmatter={"okf_version": _OKF_VERSION}, body=body + ).serialize() + + def _directories_to_index(bundle_root: Path) -> list[Path]: dirs: set[Path] = set() for md in bundle_root.rglob("*.md"): @@ -49,7 +60,7 @@ def _directories_to_index(bundle_root: Path) -> list[Path]: def regenerate_indexes( bundle_root: Path, *, - model: str = _FALLBACK_MODEL, + model: str = DEFAULT_MODEL, synthesize: Callable[..., str] = synthesize_description, ) -> list[Path]: bundle_root = Path(bundle_root) @@ -86,8 +97,11 @@ def regenerate_indexes( if not entries: continue + index_text = _build_index_text(entries) + if directory == bundle_root: + index_text = _with_root_frontmatter(index_text) index_path = directory / _INDEX_FILE - index_path.write_text(_build_index_text(entries), encoding="utf-8") + index_path.write_text(index_text, encoding="utf-8") written.append(index_path) if directory == bundle_root: diff --git a/okf/src/reference_agent/prompts/reference_instruction.md b/okf/src/reference_agent/prompts/reference_instruction.md index c4adfc3..dc64777 100644 --- a/okf/src/reference_agent/prompts/reference_instruction.md +++ b/okf/src/reference_agent/prompts/reference_instruction.md @@ -15,7 +15,7 @@ concept and finishes by calling `write_concept_doc` exactly once. 5. Compose an OKF document and call `write_concept_doc(concept_id, frontmatter, body)` exactly once. Do not call any tools after that. -## Frontmatter (YAML, required keys) +## Frontmatter (YAML — always include all of these) - `type`: the concept type, exactly as returned in the concept ref (e.g. `BigQuery Table`, `BigQuery Dataset`). diff --git a/okf/src/reference_agent/web/fetcher.py b/okf/src/reference_agent/web/fetcher.py index e9f43cd..2bc7036 100644 --- a/okf/src/reference_agent/web/fetcher.py +++ b/okf/src/reference_agent/web/fetcher.py @@ -7,7 +7,7 @@ from markdownify import markdownify -_USER_AGENT = "okf-reference-agent/0.1 (+https://github.com/amirhormati/open-knowledge-format)" +_USER_AGENT = "okf-reference-agent/0.1 (+https://github.com/GoogleCloudPlatform/knowledge-catalog)" _MAX_MARKDOWN_BYTES = 40 * 1024 _TITLE_RE = re.compile(r"