Telemetry: include embedding telemetry in director_decision payload; add docs note (embeddings.md)\n\n- Attach embedding timing and fallback info to director telemetry under metrics.embedding\n- Add docs/dev/embeddings.md describing enabling, telemetry shape, and follow-ups

SorraTheOrc · SorraTheOrc · commit 00e14bf422bc · 2026-01-21T16:51:17.000-08:00
diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
@@ -195,7 +195,7 @@
 {"id":"ge-hch.5.20","title":"Feature-Flagged Release","description":"Release M2 to players behind feature flags with kill-switches.\n\n## Scope\n- Feature flag implementation for AI branching\n- Kill-switch for emergency disable\n- Internal playtester rollout\n- Initial player coherence validation\n- **Player experience change**: AI branching available to wider audience (not just internal testing). Players can opt-in/out via settings. If issues arise, feature can be disabled globally without deployment.\n\n## Success Criteria\n- Feature flag controls AI branch availability\n- Kill-switch disables AI branches within 1 minute\n- Internal playtesters rate coherence ≥4/5\n- No critical bugs in first playtester cohort\n- Players can enable/disable AI branching in settings\n- Players with feature disabled experience unchanged authored story\n- Players with feature enabled experience full AI branching capability\n\n## Dependencies\n- Milestone 7: Validation Test Corpus \u0026 Tuning (ge-hch.5.19)\n\n## Deliverables\n- Feature flag configuration\n- Kill-switch mechanism\n- Playtester feedback collection\n- Initial coherence report","status":"open","priority":1,"issue_type":"epic","assignee":"Build","created_at":"2026-01-16T13:23:52.637358271-08:00","created_by":"rgardler","updated_at":"2026-01-16T13:23:52.637358271-08:00","labels":["milestone","stage:idea"],"dependencies":[{"issue_id":"ge-hch.5.20","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-16T13:23:52.638556813-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.20","depends_on_id":"ge-hch.5.19","type":"blocks","created_at":"2026-01-16T13:24:21.793163865-08:00","created_by":"rgardler"}]}
 {"id":"ge-hch.5.20.1","title":"Backend API Relay for LLM Requests","description":"Implement a backend relay service to proxy LLM API requests, solving CORS limitations and enabling usage telemetry.\n\n## Problem\n\nBrowser-side requests to enterprise AI APIs (Azure OpenAI, Anthropic, etc.) are blocked by CORS policies. Currently, only OpenAI's API supports browser CORS. This limits the AI Writer to OpenAI or requires users to run local CORS proxies for development.\n\n## Solution\n\nImplement a lightweight backend relay that:\n1. Receives requests from the browser frontend\n2. Forwards them to the configured LLM endpoint (Azure OpenAI, OpenAI, etc.)\n3. Returns responses with proper CORS headers\n4. Collects anonymized usage telemetry\n\n## Benefits\n\n- **Universal API support**: Works with any LLM provider regardless of CORS policy\n- **Usage telemetry**: Enables collection of anonymized usage data for analysis\n- **API key security**: Keys can be stored server-side instead of in browser localStorage\n- **Rate limiting**: Can implement server-side rate limiting and quotas\n- **Cost tracking**: Can track API usage costs across users\n\n## Implementation Options\n\n1. **Azure Function** - Serverless, scales automatically, Azure-native\n2. **Cloudflare Worker** - Edge deployment, low latency, simple setup\n3. **Node.js server** - Full control, can be containerized\n\n## Acceptance Criteria\n\n- [ ] Relay accepts POST requests from frontend with LLM request payload\n- [ ] Relay forwards requests to configured LLM endpoint\n- [ ] Relay returns responses with CORS headers for allowed origins\n- [ ] Relay logs anonymized telemetry (request count, latency, model used, error rates)\n- [ ] Frontend can be configured to use relay URL instead of direct API\n- [ ] Documentation for deployment and configuration\n\n## Files/Paths\n\n- `server/` or `functions/` - Backend relay implementation\n- `web/demo/js/llm-adapter.js` - Update to support relay mode\n- `web/demo/js/api-key-manager.js` - Update settings UI for relay configuration\n- `docs/dev/backend-relay.md` - Deployment documentation\n\n## Discovered From\n\nge-hch.5.14 (AI Writer Implementation) - CORS limitation discovered during Azure OpenAI testing","status":"open","priority":2,"issue_type":"feature","created_at":"2026-01-16T15:37:15.759001241-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:37:15.759001241-08:00","labels":["stage:idea"],"dependencies":[{"issue_id":"ge-hch.5.20.1","depends_on_id":"ge-hch.5.20","type":"parent-child","created_at":"2026-01-16T15:37:15.761739299-08:00","created_by":"rgardler"}]}
 {"id":"ge-hch.5.21","title":"Telemetry Analysis \u0026 Tuning","description":"Analyze production telemetry and tune Director/Writer based on player data.\n\n## Scope\n- Build observability dashboards (5 views per telemetry-schema.md)\n- Analyze acceptance rates, latency, policy violations\n- Tune Director risk thresholds and Writer prompts\n- Document tuning decisions for Phase 4 planning\n- **Player experience change**: Data-driven improvements to AI branching. Players experience better-timed branch offers, improved narrative coherence, and content better matched to their play style (based on player preference learning).\n\n## Success Criteria\n- All 5 dashboard views operational\n- Director decision latency P95 \u003c500ms in production\n- Policy violation rate \u003c2%\n- Tuning report with recommendations for Phase 4\n- Player acceptance rate of AI branches increases over tuning period\n- Player coherence ratings improve compared to M8 baseline\n\n## Dependencies\n- Milestone 8: Feature-Flagged Release (ge-hch.5.20)\n\n## Deliverables\n- Observability dashboards\n- Tuning report\n- Updated Director/Writer configuration\n- Phase 4 recommendations document","status":"open","priority":1,"issue_type":"epic","assignee":"Build","created_at":"2026-01-16T13:24:01.316180976-08:00","created_by":"rgardler","updated_at":"2026-01-16T13:24:01.316180976-08:00","labels":["milestone","stage:idea"],"dependencies":[{"issue_id":"ge-hch.5.21","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-16T13:24:01.31712331-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.21","depends_on_id":"ge-hch.5.20","type":"blocks","created_at":"2026-01-16T13:24:21.834830044-08:00","created_by":"rgardler"}]}
-{"id":"ge-hch.5.22","title":"Runtime: integrate embeddings into engine","description":"Integrate embedding-based similarity into the runtime engine so risk scoring and validation can use local semantic embeddings during play.\\n\\n## Context\\nEmbedding service exists in web/demo/js/embedding-service.js and tests cover similarity thresholds. We need to connect it to runtime pipelines so embeddings can be used for scoring (theme, lore, voice) during live branching.\\n\\n## Acceptance Criteria\\n- Runtime code calls embedding service when scoring AI branches (thematic consistency, lore adherence, character voice) with caching to avoid repeated inference in a session.\\n- Clear configuration for enabling/disabling embeddings in runtime (env flag or settings).\\n- Telemetry event records embedding timing and fallback status.\\n- Update docs describing how to enable embeddings in runtime (include any env flags).\\n- Add or update tests for embedding-enabled runtime scoring (unit or integration).\\n\\n## Implementation Notes\\n- Likely touch: src/director/ or src/runtime/ scoring modules, config handling, and telemetry emitter.\\n- Ensure fallback behavior when embeddings unavailable (e.g., null -\u003e neutral score).\\n\\n## Files/Paths to touch\\n- src/director/** or src/runtime/**\\n- src/telemetry/**\\n- web/demo/js/embedding-service.js (if API adjustments needed)\\n- docs/dev/** or README\\n","status":"in_progress","priority":1,"issue_type":"feature","assignee":"@OpenCode","owner":"ross@gardler.org","created_at":"2026-01-21T11:06:00.862351668-08:00","created_by":"Ross Gardler","updated_at":"2026-01-21T16:30:28.148830957-08:00","labels":["stage:idea","stage:in_progress"],"dependencies":[{"issue_id":"ge-hch.5.22","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-21T11:06:00.866061912-08:00","created_by":"Ross Gardler"}]}
+{"id":"ge-hch.5.22","title":"Runtime: integrate embeddings into engine","description":"Integrate embedding-based similarity into the runtime engine so risk scoring and validation can use local semantic embeddings during play.\\n\\n## Context\\nEmbedding service exists in web/demo/js/embedding-service.js and tests cover similarity thresholds. We need to connect it to runtime pipelines so embeddings can be used for scoring (theme, lore, voice) during live branching.\\n\\n## Acceptance Criteria\\n- Runtime code calls embedding service when scoring AI branches (thematic consistency, lore adherence, character voice) with caching to avoid repeated inference in a session.\\n- Clear configuration for enabling/disabling embeddings in runtime (env flag or settings).\\n- Telemetry event records embedding timing and fallback status.\\n- Update docs describing how to enable embeddings in runtime (include any env flags).\\n- Add or update tests for embedding-enabled runtime scoring (unit or integration).\\n\\n## Implementation Notes\\n- Likely touch: src/director/ or src/runtime/ scoring modules, config handling, and telemetry emitter.\\n- Ensure fallback behavior when embeddings unavailable (e.g., null -\u003e neutral score).\\n\\n## Files/Paths to touch\\n- src/director/** or src/runtime/**\\n- src/telemetry/**\\n- web/demo/js/embedding-service.js (if API adjustments needed)\\n- docs/dev/** or README\\n","status":"in_progress","priority":1,"issue_type":"feature","assignee":"@OpenCode","owner":"ross@gardler.org","created_at":"2026-01-21T11:06:00.862351668-08:00","created_by":"Ross Gardler","updated_at":"2026-01-21T16:30:28.148830957-08:00","labels":["stage:idea","stage:in_progress"],"dependencies":[{"issue_id":"ge-hch.5.22","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-21T11:06:00.866061912-08:00","created_by":"Ross Gardler"}],"comments":[{"id":267,"issue_id":"ge-hch.5.22","author":"@OpenCode","text":"Implemented embedding integration in Director runtime: opt-in  flag, compute embedding metrics in evaluate() and pass  to computeRiskScore; safe fallbacks and tests passing locally. PR: https://github.com/TheWizardsCode/GEngine/pull/186","created_at":"2026-01-22T00:33:10Z"}]}
 {"id":"ge-hch.5.3","title":"Secure: LLM API key storage","description":"Track storing the OpenAI-compatible API key in CI secrets and local dev guidance.\\n\\n## Acceptance Criteria\\n- CI secret name documented (OPENAI_API_KEY).\\n- docs/security/llm_api_key.md created with guidance for local auth and GitHub Actions.\\n","status":"closed","priority":2,"issue_type":"task","assignee":"Build","created_at":"2026-01-07T19:44:15.042691181-08:00","created_by":"rgardler","updated_at":"2026-01-16T02:09:46.342313309-08:00","closed_at":"2026-01-16T02:09:46.342313309-08:00","close_reason":"Auto-close: cleanup per status-skill","dependencies":[{"issue_id":"ge-hch.5.3","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-07T19:44:15.044559498-08:00","created_by":"rgardler"}],"comments":[{"id":164,"issue_id":"ge-hch.5.3","author":"rgardler","text":"Auto-closing per status-skill recommendation: no linked branches/PRs and not an epic. If this is incorrect, please reopen or add a comment.","created_at":"2026-01-16T10:09:46Z"}]}
 {"id":"ge-hch.5.4","title":"Phase 0: Branch proposal schema definition","description":"Define and document the branch proposal JSON schema with complete examples.\n\n## Acceptance Criteria\n- JSON schema file created with full specification (metadata, story context, branch content, provenance).\n- At least 10 example proposal JSONs demonstrating different narrative contexts.\n- Schema includes field descriptions, type constraints, and validation rules.\n- Schema validated against example proposals using a JSON schema validator.\n\n## Files to create/modify\n- history/m2-schemas/branch-proposal.json (schema definition)\n- history/m2-schemas/examples/ (example proposals)\n- docs/m2-design/schema-docs.md (documentation)\n","status":"closed","priority":1,"issue_type":"task","assignee":"@rgardler","created_at":"2026-01-16T10:20:35.533556391-08:00","created_by":"rgardler","updated_at":"2026-01-16T10:29:01.376570289-08:00","closed_at":"2026-01-16T10:29:01.376570289-08:00","close_reason":"Completed: Branch proposal schema with 10 examples and comprehensive documentation","dependencies":[{"issue_id":"ge-hch.5.4","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-16T10:20:35.536650238-08:00","created_by":"rgardler"}]}
 {"id":"ge-hch.5.5","title":"Phase 0: Validation pipeline design","description":"Design and prototype the validation pipeline with policy checks and sanitization transforms.\n\n## Acceptance Criteria\n- Policy ruleset defined with clear categories (profanity, theme consistency, length limits, narrative red lines).\n- Sanitization transforms documented (HTML stripping, whitespace normalization, encoding validation).\n- Validation report schema defined with pass/fail status and rule-level diagnostics.\n- Test corpus created: ~20 example proposals covering happy paths and edge cases.\n- Pipeline validates deterministically: same input + ruleset → same result.\n\n## Files to create/modify\n- history/m2-design/policy-ruleset.md (policy rules with rationale)\n- history/m2-design/sanitization-transforms.md (sanitization logic)\n- history/m2-schemas/validation-report.json (schema)\n- history/m2-design/test-cases.md (test corpus)\n","status":"closed","priority":1,"issue_type":"task","assignee":"@rgardler","created_at":"2026-01-16T10:20:37.499166616-08:00","created_by":"rgardler","updated_at":"2026-01-16T10:29:01.419511433-08:00","closed_at":"2026-01-16T10:29:01.419511433-08:00","close_reason":"Completed: Validation pipeline design with policy ruleset and sanitization transforms","dependencies":[{"issue_id":"ge-hch.5.5","depends_on_id":"ge-hch.5","type":"parent-child","created_at":"2026-01-16T10:20:37.500083559-08:00","created_by":"rgardler"}]}
diff --git a/docs/dev/embeddings.md b/docs/dev/embeddings.md
@@ -0,0 +1,28 @@
+Embedding Integration (runtime)
+
+Overview
+--------
+The Director can optionally use local semantic embeddings to improve scoring for thematic consistency, lore adherence, and character voice. This feature is opt-in and disabled by default.
+
+Enabling
+--------
+- Set `enableEmbeddings: true` in `.gengine/config.yaml` under `directorConfig`, or pass `{ enableEmbeddings: true }` via the `evaluate()` `config` argument.
+- For Node integration tests you may set environment flags (used by embedding service): `EMBED_NODE=1` to enable Node fallback.
+
+Telemetry
+---------
+When embeddings are enabled, Director emits embedding telemetry inside the `director_decision` event under `metrics.embedding` with fields:
+- `used` (boolean) - whether embeddings were successfully computed
+- `latencyMs` (number) - inference time in milliseconds
+- `fallback` (boolean) - true when embeddings were not used and placeholders were applied
+- `metrics` (optional object) - similarity metrics (0..1) for `thematic`, `lore`, and `voice` when available
+
+Implementation notes
+--------------------
+- `evaluate()` computes embeddings asynchronously (using `web/demo/js/embedding-service.js` when available) and derives similarity metrics when story-level embeddings are provided on `storyContext` as `themeEmbedding`, `loreEmbedding`, `voiceEmbedding` arrays.
+- `computeRiskScore()` remains synchronous and reads precomputed `config.embeddingMetrics` (if present) to convert similarities into placeholder risks. This keeps the core scoring deterministic and testable.
+
+Follow-ups
+----------
+1) Precompute story-level embeddings at load-time and attach them to story context. (bead created)
+2) Optionally emit a dedicated `embedding_inference` telemetry event in addition to including embedding metadata in director telemetry. (bead created)
diff --git a/web/demo/js/director.js b/web/demo/js/director.js
@@ -407,22 +407,36 @@ async function evaluate(proposal, storyContext = {}, config = {}) {
     if (embeddingsEnabled) {
       const EmbeddingService = (typeof window !== 'undefined' && window.EmbeddingService) ? window.EmbeddingService : (typeof require === 'function' ? require('../../../web/demo/js/embedding-service') : null);
       if (EmbeddingService && typeof EmbeddingService.embed === 'function' && typeof EmbeddingService.similarity === 'function') {
-        try {
+      try {
           const text = (proposal.content && proposal.content.text) || '';
-          // We compute embedding and compare to any provided context embeddings
-          // on storyContext (theme/lore/voice). These context embeddings are
-          // optional and must be arrays of numbers. Failures produce no-op.
-          const emb = await (EmbeddingService.embed ? EmbeddingService.embed(text) : null);
-          if (emb && Array.isArray(emb)) {
-            const emMetrics = {};
-            const themeEmb = storyContext && storyContext.themeEmbedding;
-            const loreEmb = storyContext && storyContext.loreEmbedding;
-            const voiceEmb = storyContext && storyContext.voiceEmbedding;
-            if (themeEmb && Array.isArray(themeEmb)) emMetrics.thematic = EmbeddingService.similarity(emb, themeEmb);
-            if (loreEmb && Array.isArray(loreEmb)) emMetrics.lore = EmbeddingService.similarity(emb, loreEmb);
-            if (voiceEmb && Array.isArray(voiceEmb)) emMetrics.voice = EmbeddingService.similarity(emb, voiceEmb);
-            finalConfig = Object.assign({}, config, { embeddingMetrics: emMetrics });
+          // Capture embedding timing and fallback status for telemetry
+          let emb = null;
+          let emMetrics = null;
+          let embeddingTelemetry = { used: false, latencyMs: 0, fallback: true };
+          try {
+            const embStart = perf.now();
+            emb = await (EmbeddingService.embed ? EmbeddingService.embed(text) : null);
+            embeddingTelemetry.latencyMs = Math.max(0, Math.round(perf.now() - embStart));
+            embeddingTelemetry.used = Boolean(emb && Array.isArray(emb));
+            embeddingTelemetry.fallback = !embeddingTelemetry.used;
+
+            if (emb && Array.isArray(emb)) {
+              emMetrics = {};
+              const themeEmb = storyContext && storyContext.themeEmbedding;
+              const loreEmb = storyContext && storyContext.loreEmbedding;
+              const voiceEmb = storyContext && storyContext.voiceEmbedding;
+              if (themeEmb && Array.isArray(themeEmb)) emMetrics.thematic = EmbeddingService.similarity(emb, themeEmb);
+              if (loreEmb && Array.isArray(loreEmb)) emMetrics.lore = EmbeddingService.similarity(emb, loreEmb);
+              if (voiceEmb && Array.isArray(voiceEmb)) emMetrics.voice = EmbeddingService.similarity(emb, voiceEmb);
+              finalConfig = Object.assign({}, config, { embeddingMetrics: emMetrics });
+            }
+          } catch (innerErr) {
+            // treat as fallback - keep embeddingTelemetry indicating fallback
           }
+          // expose embedding telemetry/metrics to outer scope via finalConfig extras
+          if (!finalConfig) finalConfig = config;
+          finalConfig._embeddingTelemetry = embeddingTelemetry;
+          finalConfig._embeddingMetrics = emMetrics;
         } catch (e) {
           // ignore embedding failures and continue with placeholder behavior
         }
@@ -452,6 +466,16 @@ async function evaluate(proposal, storyContext = {}, config = {}) {
     totalMs,
     metrics
   };
+  // If embedding telemetry was captured in finalConfig, include it in the
+  // director telemetry payload under `metrics.embedding` so ingestion can
+  // observe embedding inference timing and fallback status.
+  try {
+    const emt = finalConfig && finalConfig._embeddingTelemetry;
+    const emm = finalConfig && finalConfig._embeddingMetrics;
+    if (emt || emm) {
+      result.metrics = Object.assign({}, result.metrics, { embedding: Object.assign({}, emt || {}, { metrics: emm || null }) });
+    }
+  } catch (e) {}
 
   emitDecisionTelemetry(result);
   return result;