Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions okf/src/reference_agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
__version__ = "0.1.0"

# Default Gemini model used by the agents and the index synthesizer.
# Defined here (a dependency-free leaf module) so it has a single source of
# truth without creating an import cycle between agent.py and bundle.index.
DEFAULT_MODEL = "gemini-flash-latest"
2 changes: 1 addition & 1 deletion okf/src/reference_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from reference_agent.tools.web_tools import fetch_url

DEFAULT_MODEL = "gemini-flash-latest"
from reference_agent import DEFAULT_MODEL


def _load_prompt(filename: str) -> str:
Expand Down
5 changes: 5 additions & 0 deletions okf/src/reference_agent/bundle/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

import yaml

# Keys this agent requires before it will write a document. Note this is
# stricter than OKF conformance: the SPEC (§4.1, §9) requires only `type`
# and treats title/description/timestamp as recommended. We enforce all
# four as a quality bar for *generated* documents — hand-authored bundles
# with only `type` are still SPEC-conformant and accepted by consumers.
REQUIRED_FRONTMATTER_KEYS = ("type", "title", "description", "timestamp")

_FRONTMATTER_DELIM = "---"
Expand Down
20 changes: 17 additions & 3 deletions okf/src/reference_agent/bundle/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
from pathlib import Path
from typing import Callable

from reference_agent import DEFAULT_MODEL
from reference_agent.bundle.document import OKFDocument
from reference_agent.bundle.synthesizer import synthesize_description

_INDEX_FILE = "index.md"
_FALLBACK_MODEL = "gemini-flash-latest"

# SPEC §11: bundles MAY declare the OKF version they target via
# `okf_version` in the bundle-root index.md frontmatter — the only place
# frontmatter is permitted in an index file.
_OKF_VERSION = "0.1"


def _load_doc(path: Path) -> OKFDocument | None:
Expand All @@ -34,6 +39,12 @@ def _build_index_text(entries: list[tuple[str, str, str, str]]) -> str:
return "\n\n".join(sections) + "\n"


def _with_root_frontmatter(body: str) -> str:
return OKFDocument(
frontmatter={"okf_version": _OKF_VERSION}, body=body
).serialize()


def _directories_to_index(bundle_root: Path) -> list[Path]:
dirs: set[Path] = set()
for md in bundle_root.rglob("*.md"):
Expand All @@ -49,7 +60,7 @@ def _directories_to_index(bundle_root: Path) -> list[Path]:
def regenerate_indexes(
bundle_root: Path,
*,
model: str = _FALLBACK_MODEL,
model: str = DEFAULT_MODEL,
synthesize: Callable[..., str] = synthesize_description,
) -> list[Path]:
bundle_root = Path(bundle_root)
Expand Down Expand Up @@ -86,8 +97,11 @@ def regenerate_indexes(
if not entries:
continue

index_text = _build_index_text(entries)
if directory == bundle_root:
index_text = _with_root_frontmatter(index_text)
index_path = directory / _INDEX_FILE
index_path.write_text(_build_index_text(entries), encoding="utf-8")
index_path.write_text(index_text, encoding="utf-8")
written.append(index_path)

if directory == bundle_root:
Expand Down
2 changes: 1 addition & 1 deletion okf/src/reference_agent/prompts/reference_instruction.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ concept and finishes by calling `write_concept_doc` exactly once.
5. Compose an OKF document and call `write_concept_doc(concept_id, frontmatter,
body)` exactly once. Do not call any tools after that.

## Frontmatter (YAML, required keys)
## Frontmatter (YAML — always include all of these)

- `type`: the concept type, exactly as returned in the concept ref (e.g.
`BigQuery Table`, `BigQuery Dataset`).
Expand Down
2 changes: 1 addition & 1 deletion okf/src/reference_agent/web/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from markdownify import markdownify

_USER_AGENT = "okf-reference-agent/0.1 (+https://github.com/amirhormati/open-knowledge-format)"
_USER_AGENT = "okf-reference-agent/0.1 (+https://github.com/GoogleCloudPlatform/knowledge-catalog)"
_MAX_MARKDOWN_BYTES = 40 * 1024

_TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
Expand Down
25 changes: 25 additions & 0 deletions okf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,31 @@ def test_regenerate_groups_by_type_and_links_relative(tmp_path: Path):
assert "(tables/index.md) - stub: 2 items" in root_index


def test_root_index_declares_okf_version_others_have_no_frontmatter(
tmp_path: Path,
):
root = tmp_path / "bundle"
_write_doc(
root / "tables" / "users.md",
"BigQuery Table",
"users",
"Per-user dimension.",
)

regenerate_indexes(root, model="stub", synthesize=_stub_synth)

# SPEC §11: the bundle-root index.md is the only index that carries
# frontmatter, and it declares the targeted OKF version.
root_doc = OKFDocument.parse((root / "index.md").read_text(encoding="utf-8"))
assert root_doc.frontmatter == {"okf_version": "0.1"}

# Subdirectory indexes remain frontmatter-free.
sub_doc = OKFDocument.parse(
(root / "tables" / "index.md").read_text(encoding="utf-8")
)
assert sub_doc.frontmatter == {}


def test_regenerate_skips_empty_directories(tmp_path: Path):
root = tmp_path / "bundle"
root.mkdir()
Expand Down