Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions manifests.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
{
"id": "deep-research",
"name": "deep_research",
"description": "Protocol-first, gated, multi-agent literature investigation. Two human-in-the-loop gates bracket a four-role subagent crew (Scout / Skeptic / Methodologist / Synthesizer); every claim a synthesis ships must be backed by a row in research_evidence with a verbatim quote_span — if a claim can't produce its quote, the claim gets cut. v0.1.0 shipped the protocol (SKILL.md), DB schema (PostgreSQL + SQLite), five agent role prompts, install-manifest, and a worked example (cholesterol primary prevention). v0.2.0 adds the Python reference implementation: a stdlib-urllib scholar adapter (OpenAlex / Semantic Scholar / PubMed / arXiv / Europe PMC / Crossref / Unpaywall) and a python-docx + matplotlib synthesis builder with pluggable upload. Agent-runtime-agnostic; core stays stdlib-only, the docx builder is gated behind the optional [viz] extra.",
"description": "deep_research is an agent-runtime-agnostic workflow that turns an empirical research question into a citation-grade synthesis. Two human-in-the-loop gates (protocol pre-registration, Pass-2 spend) bracket a four-role subagent crew (Scout / Skeptic / Methodologist / Synthesizer). Every claim a synthesis ships must be backed by a row in the research_evidence table with a verbatim quote_span; claims without their quote get cut. v0.2.0 adds the Python reference implementation: a stdlib-urllib scholar adapter (OpenAlex / Semantic Scholar / PubMed / arXiv / Europe PMC / Crossref / Unpaywall) and a python-docx + matplotlib synthesis builder with pluggable upload. Core stays stdlib-only; the docx builder is gated behind the optional `[viz]` extra.",
"capabilities": [
"research",
"literature-review",
Expand All @@ -97,7 +97,8 @@
"manifest_url": "https://raw.githubusercontent.com/drknowhow/deep-research/main/manifests/deep-research.v0.4.json",
"source": "https://github.com/drknowhow/deep-research/blob/main/manifests/deep-research.v0.4.json",
"manifest_version": "0.4",
"status": "example"
"status": "example",
"auto_refresh_from_upstream": true
},
{
"id": "vi-federation",
Expand All @@ -118,7 +119,7 @@
{
"id": "yep-memory",
"name": "Yep Memory",
"description": "Persistent memory primitives for personal agents — facts, reflections, and episodes with semantic recall across conversations.",
"description": "Yep's memory subsystem exposes three orthogonal stores: facts (key-value, upsert), reflections (free-form learned generalizations), and episodes (event log with semantic search). Used internally by Yep and surfaced here as the canonical example for any personal-agent stack that wants long-running cross-session memory. Backed by Supabase + pgvector; reflection writes are deliberate and sparse, not autopilot. Published as the first dogfood entry on drknowhow/Yep to validate the federation pipeline end-to-end against toolspace.yepgent.com.",
"capabilities": [
"memory",
"personal-agent",
Expand All @@ -134,7 +135,8 @@
"publisher_kind": "https",
"publisher_id": "yepgent.com",
"trust_tier": "verified"
}
},
"auto_refresh_from_upstream": true
Comment on lines 135 to +139
},
{
"id": "muninn-blog-publish",
Expand Down
65 changes: 60 additions & 5 deletions scripts/sync_from_publishers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
the original gmail.* examples) are preserved verbatim. Federation-
sourced entries are fully derived from each fetch.

Opt-in: an example entry can carry ``auto_refresh_from_upstream: true``
to have its ``description`` (and ``capabilities``) refreshed from the
``tool.*`` fields at its ``manifest_url`` on every sync. Used for
example entries that track a real external repo (e.g. deep-research,
yep-memory) so they don't drift when upstream bumps tool.version or
edits the description. Spec-mirrored examples (gmail-*, muninn-*)
leave the flag off and stay verbatim.

Usage:
python scripts/sync_from_publishers.py [--check] [--allow-network]

Expand Down Expand Up @@ -75,6 +83,47 @@ def _fetch_with_fixtures(url: str) -> bytes:
return fetch_url(url)


def _refresh_example_from_upstream(entry: dict) -> tuple[dict, str | None]:
"""Refresh an example entry's description/capabilities from upstream.

Fetches ``entry['manifest_url']`` and overwrites ``description`` from
``tool.description`` and ``capabilities`` from ``tool.tags`` (when
present). Other fields are left intact. On any fetch/parse failure
the entry is returned unchanged and a warning string surfaces to the
caller — failing soft so a flaky upstream doesn't break the whole
sync.
"""
url = entry.get("manifest_url")
if not url:
return entry, f"{entry.get('id')}: auto_refresh_from_upstream set but no manifest_url"

try:
raw = _fetch_with_fixtures(url)
except urllib.error.HTTPError as e:
return entry, f"{entry.get('id')}: HTTP {e.code} fetching {url}; description unchanged"
except urllib.error.URLError as e:
return entry, f"{entry.get('id')}: URL error fetching {url}: {e.reason}; description unchanged"
except (TimeoutError, ValueError, OSError, FileNotFoundError) as e:
return entry, f"{entry.get('id')}: fetch error {e}; description unchanged"

try:
doc = json.loads(raw)
except (ValueError, TypeError) as e:
return entry, f"{entry.get('id')}: invalid JSON at {url}: {e}; description unchanged"
Comment on lines +109 to +112

tool = doc.get("tool") or {}
new_desc = tool.get("description")
if not isinstance(new_desc, str) or not new_desc.strip():
return entry, f"{entry.get('id')}: upstream tool.description missing/empty at {url}; description unchanged"

refreshed = dict(entry)
refreshed["description"] = new_desc
upstream_tags = tool.get("tags")
if isinstance(upstream_tags, list) and upstream_tags:
refreshed["capabilities"] = list(upstream_tags)
Comment on lines +121 to +123
return refreshed, None


def _fetch_publisher_index(publisher: dict) -> tuple[dict | None, str | None]:
"""Return (index_doc, error). Either side is None on the success path."""
kind = publisher.get("kind")
Expand Down Expand Up @@ -254,11 +303,17 @@ def build_synced_index() -> tuple[dict, list[str]]:
# Preserve non-federation entries (the original Yep-curated examples)
# and drop any pre-existing federation entries — they are fully re-
# derived from this sync run.
preserved_entries = [
e
for e in current.get("manifests", [])
if not _is_federated_entry(e) and e.get("id") not in federated_ids
]
preserved_entries = []
for e in current.get("manifests", []):
if _is_federated_entry(e) or e.get("id") in federated_ids:
continue
if e.get("auto_refresh_from_upstream"):
refreshed, refresh_warning = _refresh_example_from_upstream(e)
if refresh_warning:
warnings.append(refresh_warning)
preserved_entries.append(refreshed)
else:
preserved_entries.append(e)

new_manifests = preserved_entries + federated_entries

Expand Down
17 changes: 17 additions & 0 deletions tests/fixtures/https_example.com__refreshable.v0.4.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"manifest_version": "0.4",
"tool": {
"id": "refreshable-example",
"name": "refreshable",
"version": "0.2.0",
"description": "Upstream description that should replace the registry one.",
"tags": ["alpha", "beta", "fresh"]
},
"install": [
{
"from": "git",
"from_url": "https://github.com/example/refreshable",
"to_kind": "self_hosted"
}
]
}
86 changes: 86 additions & 0 deletions tests/test_federation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,89 @@ def _patched(url):
assert any(
"muninn-bad-enum" in w and "manifest invalid" in w for w in warnings
), warnings


# ---- auto_refresh_from_upstream for example entries ----------------------


def _seed_with_example(site, example_entry):
"""Replace the seed manifests.json with one preserved example + the given entry."""
seed = {
"version": "1",
"schema_url": "https://toolspace.yepgent.com/schemas/install-manifest-v0.4.json",
"versions": [],
"generated_at": "2026-01-01",
"manifests": [example_entry],
}
(site / "manifests.json").write_text(json.dumps(seed, indent=2), encoding="utf-8")


def test_auto_refresh_replaces_description_and_tags(sandbox):
"""An example entry with ``auto_refresh_from_upstream: true`` has its
description and capabilities overwritten from the upstream manifest
``tool.description`` and ``tool.tags`` on every sync."""
entry = {
"id": "refreshable-example",
"name": "refreshable",
"description": "Stale description that should be replaced.",
"capabilities": ["stale-tag"],
"manifest_url": "https://example.com/refreshable.v0.4.json",
"source": "https://example.com/refreshable.v0.4.json",
"manifest_version": "0.4",
"status": "example",
"auto_refresh_from_upstream": True,
}
_seed_with_example(sandbox, entry)

new_doc, _warnings = sync_from_publishers.build_synced_index()
refreshed = next(m for m in new_doc["manifests"] if m["id"] == "refreshable-example")
assert refreshed["description"] == "Upstream description that should replace the registry one."
assert refreshed["capabilities"] == ["alpha", "beta", "fresh"]
# Opt-in flag survives the refresh.
assert refreshed["auto_refresh_from_upstream"] is True


def test_auto_refresh_absent_flag_preserves_verbatim(sandbox):
"""Without the opt-in flag, the example entry is preserved verbatim
even if its manifest_url would resolve to a fetchable upstream."""
entry = {
"id": "refreshable-example",
"name": "refreshable",
"description": "Hand-curated description must not be overwritten.",
"capabilities": ["curated"],
"manifest_url": "https://example.com/refreshable.v0.4.json",
"source": "https://example.com/refreshable.v0.4.json",
"manifest_version": "0.4",
"status": "example",
# NO auto_refresh_from_upstream flag.
}
_seed_with_example(sandbox, entry)

new_doc, _warnings = sync_from_publishers.build_synced_index()
preserved = next(m for m in new_doc["manifests"] if m["id"] == "refreshable-example")
assert preserved["description"] == "Hand-curated description must not be overwritten."
assert preserved["capabilities"] == ["curated"]


def test_auto_refresh_fails_soft_on_missing_upstream(sandbox):
"""When the upstream fetch fails (no fixture), the entry is preserved
unchanged and a warning surfaces — flaky upstream MUST NOT break the
rest of the sync."""
entry = {
"id": "missing-upstream",
"name": "missing",
"description": "Original description, must survive a failed refresh.",
"capabilities": ["original"],
"manifest_url": "https://example.com/does-not-exist.v0.4.json",
"source": "https://example.com/does-not-exist.v0.4.json",
"manifest_version": "0.4",
"status": "example",
"auto_refresh_from_upstream": True,
}
_seed_with_example(sandbox, entry)

new_doc, warnings = sync_from_publishers.build_synced_index()
preserved = next(m for m in new_doc["manifests"] if m["id"] == "missing-upstream")
assert preserved["description"] == "Original description, must survive a failed refresh."
assert preserved["capabilities"] == ["original"]
assert any("missing-upstream" in w and "description unchanged" in w for w in warnings), warnings
Loading