From d0be9663c8453b7e091cb6882615c40223cde96d Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:26:43 -0600
Subject: [PATCH 01/11] docs: add design spec for manual A365 span
 instrumentation guide

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ...manual-a365-span-instrumentation-design.md | 237 ++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md

diff --git a/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md b/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md
new file mode 100644
index 00000000..a621cb2a
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md
@@ -0,0 +1,237 @@
+# Design: Manual Agent 365 Span Instrumentation (without the SDK)
+
+**Date:** 2026-05-19
+**Status:** Approved
+**Branch:** `docs/manual-a365-span-instrumentation`
+
+## Problem Statement
+
+Teams with existing OpenTelemetry-instrumented Python applications want their spans to appear in the Agent 365 portal without taking a dependency on any `microsoft-agents-a365-*` package. They need a documented attribute contract and export protocol so they can manually set the right span attributes and POST to the A365 ingestion endpoint using only `opentelemetry-sdk` and `requests`.
+
+## Audience
+
+Python developers who:
+- Already have OpenTelemetry configured (any exporter)
+- Want A365 portal compatibility without importing the A365 SDK
+- Need to understand the exact attribute contract the backend expects
+
+## Deliverable
+
+A single documentation file: `docs/manual-a365-span-instrumentation.md`
+
+## Document Structure
+
+| Section | Content |
+|---------|---------|
+| When to use this guide | Audience, prerequisites, when to use the SDK instead |
+| Attribute contract | 3 tiered tables (required/recommended/optional) per span type |
+| Resource attributes | What to set on the TracerProvider resource |
+| SDK-identifying attributes | Optional telemetry.sdk.* attrs for identification |
+| Complete examples | 3 runnable Python snippets (minimal → full → export) |
+| Exporting to Agent 365 | Endpoint URL, auth, payload format, size limits, retry |
+| End-to-end example | Full agent loop with proper span hierarchy + export |
+| Validation & troubleshooting | Verify spans arrive; common rejection reasons |
+
+## Attribute Contract
+
+### Span Type: `invoke_agent`
+
+The top-level span representing one user turn / agent invocation.
+
+| Tier | Attribute | Expected Value |
+|------|-----------|----------------|
+| **Required** | `gen_ai.operation.name` | `"invoke_agent"` |
+| **Required** | `microsoft.tenant.id` | Tenant GUID |
+| **Required** | `gen_ai.agent.id` | Agent GUID |
+| Recommended | `gen_ai.agent.name` | Human-readable agent name |
+| Recommended | `microsoft.session.id` | Session identifier |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier |
+| Recommended | `microsoft.a365.agent.blueprint.id` | Blueprint identifier |
+| Recommended | `microsoft.a365.agent.platform.id` | Platform identifier |
+| Recommended | `user.id` | End-user identifier |
+| Recommended | `server.address` | Server hostname |
+| Optional | `gen_ai.agent.description` | Agent description |
+| Optional | `gen_ai.agent.version` | Agent version string |
+| Optional | `microsoft.agent.user.id` | Agent's user identity |
+| Optional | `microsoft.agent.user.email` | Agent's user email |
+| Optional | `user.email` | End-user email |
+| Optional | `user.name` | End-user display name |
+| Optional | `client.address` | Client IP/hostname |
+| Optional | `microsoft.channel.name` | Channel name (Teams, Webchat, etc.) |
+| Optional | `microsoft.channel.link` | Channel link/URL |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages |
+| Optional | `microsoft.a365.caller.agent.name` | Calling agent name (agent-to-agent) |
+| Optional | `microsoft.a365.caller.agent.id` | Calling agent ID (agent-to-agent) |
+
+### Span Type: `inference` (LLM call)
+
+Child of `invoke_agent`. One per LLM inference call.
+
+| Tier | Attribute | Expected Value |
+|------|-----------|----------------|
+| **Required** | `gen_ai.operation.name` | `"Chat"` (or `"TextCompletion"` / `"GenerateContent"`) |
+| **Required** | `microsoft.tenant.id` | Tenant GUID |
+| **Required** | `gen_ai.agent.id` | Agent GUID |
+| **Required** | `gen_ai.request.model` | Model name (e.g. `"gpt-4o"`) |
+| Recommended | `gen_ai.usage.input_tokens` | Integer token count |
+| Recommended | `gen_ai.usage.output_tokens` | Integer token count |
+| Recommended | `gen_ai.response.finish_reasons` | JSON array of finish reasons |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier |
+| Recommended | `gen_ai.provider.name` | Provider (e.g. `"openai"`, `"azure"`) |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages |
+| Optional | `gen_ai.output.messages` | JSON-serialized output messages |
+| Optional | `server.address` | LLM endpoint hostname |
+| Optional | `server.port` | LLM endpoint port (omit if 443) |
+| Optional | `microsoft.a365.agent.thought.process` | Agent reasoning trace |
+
+### Span Type: `execute_tool`
+
+Child of `invoke_agent`. One per tool invocation.
+
+| Tier | Attribute | Expected Value |
+|------|-----------|----------------|
+| **Required** | `gen_ai.operation.name` | `"execute_tool"` |
+| **Required** | `microsoft.tenant.id` | Tenant GUID |
+| **Required** | `gen_ai.agent.id` | Agent GUID |
+| **Required** | `gen_ai.tool.name` | Tool function name |
+| Recommended | `gen_ai.tool.call.id` | Tool call ID from LLM response |
+| Recommended | `gen_ai.tool.call.arguments` | JSON-serialized arguments |
+| Recommended | `gen_ai.tool.call.result` | JSON-serialized result |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier |
+| Optional | `gen_ai.tool.type` | Tool type (e.g. `"function"`) |
+| Optional | `gen_ai.tool.description` | Tool description |
+| Optional | `server.address` | Server hostname |
+
+### Resource Attributes (on TracerProvider)
+
+| Tier | Attribute | Expected Value |
+|------|-----------|----------------|
+| **Required** | `service.name` | Your service/agent name |
+| Recommended | `service.namespace` | Your service namespace |
+
+### SDK-Identifying Attributes (on all spans)
+
+These are optional but help the backend identify the telemetry source:
+
+| Attribute | Value |
+|-----------|-------|
+| `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
+| `telemetry.sdk.language` | `"python"` |
+| `telemetry.sdk.version` | Your version string |
+
+## Export Protocol
+
+### Endpoint
+
+```
+POST https://agent365.svc.cloud.microsoft/observability/tenants/{tenantId}/otlp/agents/{agentId}/traces?api-version=1
+```
+
+Where `{tenantId}` and `{agentId}` come from the span attributes `microsoft.tenant.id` and `gen_ai.agent.id`.
+
+### Authentication
+
+```
+Authorization: Bearer <token>
+Content-Type: application/json
+```
+
+Token is obtained from a resolver function with signature: `(agent_id: str, tenant_id: str) -> str`
+
+The guide will document the interface but not prescribe a specific token acquisition method (MSAL, managed identity, etc.) since that depends on the deployment environment.
+
+### Payload Format
+
+OTLP-like JSON (not standard OTLP protobuf):
+
+```json
+{
+  "resourceSpans": [
+    {
+      "resource": {
+        "attributes": { "service.name": "my-agent", "service.namespace": "my-ns" }
+      },
+      "scopeSpans": [
+        {
+          "scope": { "name": "my-instrumentor", "version": "1.0.0" },
+          "spans": [
+            {
+              "traceId": "0af7651916cd43dd8448eb211c80319c",
+              "spanId": "b7ad6b7169203331",
+              "parentSpanId": null,
+              "name": "invoke_agent my-agent",
+              "kind": "INTERNAL",
+              "startTimeUnixNano": 1716000000000000000,
+              "endTimeUnixNano": 1716000001000000000,
+              "attributes": { "gen_ai.operation.name": "invoke_agent", "..." : "..." },
+              "events": null,
+              "links": null,
+              "status": { "code": "OK", "message": "" }
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Constraints
+
+| Constraint | Value | Behavior on violation |
+|------------|-------|----------------------|
+| Max payload size | ~900,000 bytes | Split into multiple POSTs (chunks) |
+| Max individual span size | 250,000 bytes | Largest attributes truncated to `"TRUNCATED"` |
+| Required span filter | `gen_ai.operation.name` ∈ `{invoke_agent, execute_tool, chat, Chat}` | Spans with other values are silently dropped |
+| Required identity | Both `microsoft.tenant.id` and `gen_ai.agent.id` present and non-empty | Spans without both are silently dropped |
+| Retryable HTTP codes | 408, 429, 5xx | Retry with exponential backoff (respect `Retry-After` for 429) |
+| Non-retryable HTTP codes | Other 4xx | Fail immediately |
+
+### Span Name Convention
+
+| Span type | Span name format |
+|-----------|-----------------|
+| invoke_agent | `"invoke_agent"` or `"invoke_agent <agent_name>"` |
+| inference | `"<operation> <model>"` (e.g. `"Chat gpt-4o"`) |
+| execute_tool | `"execute_tool <tool_name>"` |
+
+## Examples Plan
+
+### Example 1: Minimal invoke_agent span
+
+Creates a single root span with only required attributes, exports to `ConsoleSpanExporter` for verification.
+
+### Example 2: Full agent turn with hierarchy
+
+Creates `invoke_agent` → `inference` + `execute_tool` children with all recommended attributes. Still uses console export.
+
+### Example 3: DIY export to Agent 365 backend
+
+Implements a minimal custom `SpanExporter` that builds the JSON envelope and POSTs to the A365 endpoint with Bearer auth. Shows the complete flow from span creation to backend ingestion without any A365 package.
+
+### Example 4: End-to-end agent loop
+
+Combines examples 2 + 3 into a realistic agent loop: receive user message → invoke_agent span → call OpenAI (inference span) → execute tool (execute_tool span) → export to A365.
+
+## Validation & Troubleshooting
+
+The guide will include:
+- How to verify spans appear in the A365 portal after export
+- Common HTTP error codes and what they mean
+- Checklist: "My spans aren't showing up" (missing required attrs, wrong operation name, auth failure, payload too large)
+
+## Key Design Decisions
+
+1. **Zero A365 package dependency** — only `opentelemetry-sdk` and `requests` required
+2. **Tiered attribute contract** — Required (backend drops without) / Recommended (enables features) / Optional (enrichment)
+3. **Document the allow-list explicitly** — spans with `gen_ai.operation.name` not in the set are filtered
+4. **Token resolver interface documented, not implementation** — users bring their own auth
+5. **Custom exporter example, not OTLPSpanExporter** — A365 backend uses a custom JSON format, not standard OTLP
+6. **Versioning caveat** — the guide will note that the payload format is a contract that may evolve; the SDK handles this automatically and is the recommended path for production
+
+## Out of Scope
+
+- Token acquisition implementation (MSAL, managed identity, etc.)
+- Multi-language support (future work)
+- Baggage propagation (SDK-specific concern, not needed for manual spans)
+- The `_EnrichingBatchSpanProcessor` enrichment pattern (SDK internal)

From 8fd8980d4782c3cbedeb586f9acb6767a6b51722 Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:31:13 -0600
Subject: [PATCH 02/11] docs: add implementation plan for manual A365 span
 instrumentation guide

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ...-05-19-manual-a365-span-instrumentation.md | 1055 +++++++++++++++++
 1 file changed, 1055 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md

diff --git a/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md b/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md
new file mode 100644
index 00000000..55d15ad3
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md
@@ -0,0 +1,1055 @@
+# Manual A365 Span Instrumentation Guide — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Create a comprehensive documentation guide (`docs/manual-a365-span-instrumentation.md`) that enables Python developers to manually add A365-compatible span attributes and export to the Agent 365 backend without depending on any `microsoft-agents-a365-*` package.
+
+**Architecture:** Single markdown document with tiered attribute tables, runnable Python code examples using only `opentelemetry-sdk` + `requests`, and full export protocol documentation. Each code snippet must be self-contained and copy-paste runnable.
+
+**Tech Stack:** Markdown, Python (opentelemetry-sdk, requests), Agent 365 observability backend HTTP API
+
+---
+
+## File Structure
+
+| File | Responsibility |
+|------|---------------|
+| `docs/manual-a365-span-instrumentation.md` | The complete guide (single deliverable) |
+| `docs/integrating-with-existing-opentelemetry.md` | Existing guide — add a cross-link to the new doc |
+
+---
+
+### Task 1: Scaffold the guide with intro and prerequisites
+
+**Files:**
+- Create: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Create the document with header, intro, and prerequisites**
+
+```markdown
+# Manual Agent 365 span instrumentation (without the SDK)
+
+This guide documents the **attribute contract** and **export protocol** for making your OpenTelemetry spans compatible with the Agent 365 observability backend — without importing any `microsoft-agents-a365-*` package.
+
+## When to use this guide
+
+Use this guide if you:
+
+- Have an existing Python application already instrumented with OpenTelemetry
+- Want your agent spans to appear in the Agent 365 portal
+- Prefer not to add the Agent 365 SDK as a dependency
+
+**When to use the SDK instead:** If you're starting fresh or can accept the dependency, the SDK (`microsoft-agents-a365-observability-core`) handles all of this automatically — attribute setting, span lifecycle, export, retries, and payload chunking. See [Integrating with existing OpenTelemetry](./integrating-with-existing-opentelemetry.md).
+
+## Prerequisites
+
+- Python 3.11+
+- `opentelemetry-sdk` (any recent version)
+- `requests` (for manual export to the A365 backend)
+- A registered Agent 365 agent (you'll need the `tenant_id` and `agent_id`)
+- A token resolver that can produce a Bearer token for the A365 ingestion endpoint
+
+Install dependencies:
+
+```bash
+pip install opentelemetry-sdk opentelemetry-api requests
+```
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: scaffold manual A365 span instrumentation guide"
+```
+
+---
+
+### Task 2: Write the attribute contract tables
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add the attribute contract section with all three span type tables**
+
+Append after the Prerequisites section:
+
+```markdown
+## Attribute contract
+
+The Agent 365 backend filters spans by `gen_ai.operation.name` and routes them by `microsoft.tenant.id` + `gen_ai.agent.id`. Spans missing required attributes are silently dropped.
+
+### Accepted `gen_ai.operation.name` values
+
+Only spans with one of these values pass the backend's ingest filter:
+
+| Value | Span type |
+|-------|-----------|
+| `invoke_agent` | Top-level agent invocation |
+| `Chat` | Inference (manual instrumentation convention) |
+| `chat` | Inference (OTel GenAI semconv / auto-instrumentation) |
+| `TextCompletion` | Inference (text completion) |
+| `GenerateContent` | Inference (content generation) |
+| `execute_tool` | Tool execution |
+
+### `invoke_agent` span
+
+The top-level span representing one user turn / agent invocation.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"invoke_agent"` | Must match exactly |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Used for routing |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Used for routing |
+| Recommended | `gen_ai.agent.name` | Human-readable agent name | Displayed in portal |
+| Recommended | `microsoft.session.id` | Session identifier | Groups turns in portal |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | Thread grouping |
+| Recommended | `microsoft.a365.agent.blueprint.id` | Blueprint GUID | Links to agent definition |
+| Recommended | `microsoft.a365.agent.platform.id` | Platform identifier | Identifies hosting platform |
+| Recommended | `user.id` | End-user identifier | Portal user analytics |
+| Recommended | `server.address` | Server hostname | |
+| Optional | `gen_ai.agent.description` | Agent description | |
+| Optional | `gen_ai.agent.version` | Agent version string | |
+| Optional | `microsoft.agent.user.id` | Agent's service identity | |
+| Optional | `microsoft.agent.user.email` | Agent's service email | |
+| Optional | `user.email` | End-user email | |
+| Optional | `user.name` | End-user display name | |
+| Optional | `client.address` | Client IP or hostname | |
+| Optional | `microsoft.channel.name` | Channel (e.g. `"Teams"`, `"Webchat"`) | |
+| Optional | `microsoft.channel.link` | Channel URL | |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages | Can be large; may be truncated |
+| Optional | `microsoft.a365.caller.agent.name` | Calling agent name | For agent-to-agent calls |
+| Optional | `microsoft.a365.caller.agent.id` | Calling agent GUID | For agent-to-agent calls |
+| Optional | `microsoft.a365.caller.agent.blueprint.id` | Calling agent blueprint | For agent-to-agent calls |
+
+### `inference` span (LLM call)
+
+Child of `invoke_agent`. One per LLM inference call.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"Chat"` or `"TextCompletion"` or `"GenerateContent"` | See accepted values above |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
+| **Required** | `gen_ai.request.model` | Model name (e.g. `"gpt-4o"`) | |
+| Recommended | `gen_ai.usage.input_tokens` | Integer | Token billing/monitoring |
+| Recommended | `gen_ai.usage.output_tokens` | Integer | Token billing/monitoring |
+| Recommended | `gen_ai.response.finish_reasons` | JSON array (e.g. `["stop"]`) | |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
+| Recommended | `gen_ai.provider.name` | `"openai"`, `"azure"`, etc. | |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages | |
+| Optional | `gen_ai.output.messages` | JSON-serialized output messages | |
+| Optional | `server.address` | LLM endpoint hostname | |
+| Optional | `server.port` | LLM endpoint port | Omit if 443 |
+| Optional | `microsoft.a365.agent.thought.process` | Agent reasoning trace | |
+
+### `execute_tool` span
+
+Child of `invoke_agent`. One per tool invocation.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"execute_tool"` | Must match exactly |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
+| **Required** | `gen_ai.tool.name` | Tool function name | |
+| Recommended | `gen_ai.tool.call.id` | Tool call ID from LLM response | |
+| Recommended | `gen_ai.tool.call.arguments` | JSON-serialized arguments | |
+| Recommended | `gen_ai.tool.call.result` | JSON-serialized result | Set after execution |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
+| Optional | `gen_ai.tool.type` | `"function"` | |
+| Optional | `gen_ai.tool.description` | Tool description | |
+| Optional | `server.address` | Server hostname | |
+
+### Resource attributes
+
+Set these on your `TracerProvider`'s `Resource`:
+
+| Tier | Attribute | Expected value |
+|------|-----------|----------------|
+| **Required** | `service.name` | Your service/agent name |
+| Recommended | `service.namespace` | Your service namespace |
+
+### SDK-identifying attributes (optional)
+
+Set these on every span to identify your telemetry source:
+
+| Attribute | Value |
+|-----------|-------|
+| `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
+| `telemetry.sdk.language` | `"python"` |
+| `telemetry.sdk.version` | Your version string |
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add attribute contract tables to manual instrumentation guide"
+```
+
+---
+
+### Task 3: Write Example 1 — minimal invoke_agent span
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add the examples section header and Example 1**
+
+Append after the attribute contract section:
+
+```markdown
+## Examples
+
+### Example 1: Minimal `invoke_agent` span
+
+Creates a single root span with only the required attributes and exports to console for verification.
+
+```python
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration (replace with your values) ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+
+# --- Set up OpenTelemetry with console export ---
+resource = Resource.create({"service.name": AGENT_NAME})
+provider = TracerProvider(resource=resource)
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+trace.set_tracer_provider(provider)
+
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Create an invoke_agent span ---
+with tracer.start_as_current_span(
+    name=f"invoke_agent {AGENT_NAME}",
+    kind=trace.SpanKind.INTERNAL,
+) as span:
+    # Required attributes
+    span.set_attribute("gen_ai.operation.name", "invoke_agent")
+    span.set_attribute("microsoft.tenant.id", TENANT_ID)
+    span.set_attribute("gen_ai.agent.id", AGENT_ID)
+
+    # Recommended attributes
+    span.set_attribute("gen_ai.agent.name", AGENT_NAME)
+    span.set_attribute("microsoft.session.id", str(uuid.uuid4()))
+    span.set_attribute("gen_ai.conversation.id", str(uuid.uuid4()))
+
+    # ... your agent logic here ...
+    print("Agent invoked successfully")
+
+# Flush to ensure spans are exported
+provider.force_flush()
+```
+
+Run this and you should see a JSON span dump on stdout with `gen_ai.operation.name: invoke_agent`.
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add Example 1 (minimal invoke_agent span)"
+```
+
+---
+
+### Task 4: Write Example 2 — full agent turn with span hierarchy
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add Example 2 with all three span types**
+
+Append after Example 1:
+
+```markdown
+### Example 2: Full agent turn with span hierarchy
+
+Creates the proper parent-child relationship: `invoke_agent` → `inference` + `execute_tool`.
+
+```python
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+MODEL_NAME = "gpt-4o"
+PROVIDER_NAME = "azure"
+
+# --- OpenTelemetry setup ---
+resource = Resource.create({
+    "service.name": AGENT_NAME,
+    "service.namespace": "my-namespace",
+})
+provider = TracerProvider(resource=resource)
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+trace.set_tracer_provider(provider)
+
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Simulate an agent turn ---
+session_id = str(uuid.uuid4())
+conversation_id = str(uuid.uuid4())
+user_message = "What's the weather in Seattle?"
+
+
+def get_weather(city: str) -> str:
+    """Simulated tool."""
+    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
+
+
+# Top-level: invoke_agent
+with tracer.start_as_current_span(
+    name=f"invoke_agent {AGENT_NAME}",
+    kind=trace.SpanKind.INTERNAL,
+) as agent_span:
+    agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
+    agent_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+    agent_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+    agent_span.set_attribute("gen_ai.agent.name", AGENT_NAME)
+    agent_span.set_attribute("microsoft.session.id", session_id)
+    agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
+    agent_span.set_attribute("user.id", "user-123")
+    agent_span.set_attribute("gen_ai.input.messages", json.dumps([
+        {"role": "user", "content": user_message}
+    ]))
+
+    # Child: inference (LLM call)
+    with tracer.start_as_current_span(
+        name=f"Chat {MODEL_NAME}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as inference_span:
+        inference_span.set_attribute("gen_ai.operation.name", "Chat")
+        inference_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+        inference_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+        inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+        inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+        inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        inference_span.set_attribute("server.address", "my-resource.openai.azure.com")
+
+        # ... call your LLM here ...
+        # After response:
+        inference_span.set_attribute("gen_ai.usage.input_tokens", 42)
+        inference_span.set_attribute("gen_ai.usage.output_tokens", 15)
+        inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
+
+    # Child: execute_tool
+    tool_call_id = "call_abc123"
+    tool_name = "get_weather"
+    tool_args = json.dumps({"city": "Seattle"})
+
+    with tracer.start_as_current_span(
+        name=f"execute_tool {tool_name}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as tool_span:
+        tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
+        tool_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+        tool_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+        tool_span.set_attribute("gen_ai.tool.name", tool_name)
+        tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
+        tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
+        tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        tool_span.set_attribute("gen_ai.tool.type", "function")
+
+        # Execute the tool
+        result = get_weather("Seattle")
+        tool_span.set_attribute("gen_ai.tool.call.result", result)
+
+provider.force_flush()
+```
+
+You should see three spans in the console output: `invoke_agent my-weather-agent` (root), `Chat gpt-4o` (child), and `execute_tool get_weather` (child). Verify that `parentSpanId` on the children matches the root's `spanId`.
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add Example 2 (full agent turn with span hierarchy)"
+```
+
+---
+
+### Task 5: Write the export protocol section
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add the export protocol documentation**
+
+Append after Example 2:
+
+```markdown
+## Exporting to the Agent 365 backend
+
+The Agent 365 backend does **not** accept standard OTLP protobuf or OTLP/HTTP JSON. It uses a custom OTLP-like JSON format. This section documents the HTTP contract.
+
+### Endpoint
+
+```
+POST https://agent365.svc.cloud.microsoft/observability/tenants/{tenantId}/otlp/agents/{agentId}/traces?api-version=1
+```
+
+Replace `{tenantId}` and `{agentId}` with the values from your span attributes (`microsoft.tenant.id` and `gen_ai.agent.id`).
+
+### Authentication
+
+Every request requires a Bearer token:
+
+```
+Authorization: Bearer <token>
+Content-Type: application/json
+```
+
+The token is obtained from a **token resolver** — a function with signature:
+
+```python
+def resolve_token(agent_id: str, tenant_id: str) -> str:
+    """Return a valid Bearer token for the given agent and tenant."""
+    ...
+```
+
+How you implement this depends on your environment (MSAL client credentials, managed identity, etc.). The A365 SDK uses this same interface internally.
+
+### Payload format
+
+The body is JSON with this structure:
+
+```json
+{
+  "resourceSpans": [
+    {
+      "resource": {
+        "attributes": {
+          "service.name": "my-agent",
+          "service.namespace": "my-namespace"
+        }
+      },
+      "scopeSpans": [
+        {
+          "scope": {
+            "name": "my-agent-instrumentation",
+            "version": "1.0.0"
+          },
+          "spans": [
+            {
+              "traceId": "0af7651916cd43dd8448eb211c80319c",
+              "spanId": "b7ad6b7169203331",
+              "parentSpanId": null,
+              "name": "invoke_agent my-agent",
+              "kind": "INTERNAL",
+              "startTimeUnixNano": 1716000000000000000,
+              "endTimeUnixNano": 1716000001000000000,
+              "attributes": {
+                "gen_ai.operation.name": "invoke_agent",
+                "microsoft.tenant.id": "tenant-guid",
+                "gen_ai.agent.id": "agent-guid"
+              },
+              "events": null,
+              "links": null,
+              "status": {
+                "code": "OK",
+                "message": ""
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Field reference
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `traceId` | string | 32 hex chars (128-bit trace ID) |
+| `spanId` | string | 16 hex chars (64-bit span ID) |
+| `parentSpanId` | string \| null | Parent's spanId, or null for root |
+| `name` | string | Span name (see naming conventions below) |
+| `kind` | string | Span kind name: `"INTERNAL"`, `"CLIENT"`, `"SERVER"`, etc. |
+| `startTimeUnixNano` | integer | Start time in nanoseconds since Unix epoch |
+| `endTimeUnixNano` | integer | End time in nanoseconds since Unix epoch |
+| `attributes` | object \| null | Key-value map of span attributes |
+| `events` | array \| null | Span events (exceptions, logs) |
+| `links` | array \| null | Span links |
+| `status.code` | string | `"UNSET"`, `"OK"`, or `"ERROR"` |
+| `status.message` | string | Error description (empty for non-error) |
+
+### Span name conventions
+
+| Span type | Name format | Example |
+|-----------|-------------|---------|
+| invoke_agent | `"invoke_agent"` or `"invoke_agent <agent_name>"` | `"invoke_agent my-weather-agent"` |
+| inference | `"<operation> <model>"` | `"Chat gpt-4o"` |
+| execute_tool | `"execute_tool <tool_name>"` | `"execute_tool get_weather"` |
+
+### Constraints
+
+| Constraint | Value | Behavior |
+|------------|-------|----------|
+| Max payload size | ~900,000 bytes | Split spans across multiple POST requests |
+| Max individual span | 250,000 bytes | Largest attributes are replaced with `"TRUNCATED"` |
+| Retry on | 408, 429, 5xx | Exponential backoff; respect `Retry-After` header for 429 |
+| Fail on | Other 4xx | Non-retryable; check auth and payload format |
+| Timeout | 30 seconds | Per-request HTTP timeout |
+
+### Grouping requirement
+
+All spans in a single POST must share the same `microsoft.tenant.id` and `gen_ai.agent.id`. If your batch contains spans for multiple tenants or agents, partition them into separate requests.
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add export protocol section to manual instrumentation guide"
+```
+
+---
+
+### Task 6: Write Example 3 — DIY exporter
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add Example 3 with a custom SpanExporter implementation**
+
+Append after the export protocol section:
+
+```markdown
+### Example 3: Custom exporter for the Agent 365 backend
+
+A minimal `SpanExporter` that builds the JSON envelope and POSTs to the A365 endpoint. This replaces the SDK's `_Agent365Exporter` without any A365 dependency.
+
+```python
+import json
+import logging
+import time
+from collections.abc import Sequence
+
+import requests
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from opentelemetry.trace import StatusCode
+
+logger = logging.getLogger(__name__)
+
+# Accepted operation names — spans with other values are filtered out
+ACCEPTED_OPERATIONS = frozenset({
+    "invoke_agent", "execute_tool", "chat", "Chat", "TextCompletion", "GenerateContent",
+})
+
+A365_ENDPOINT = "https://agent365.svc.cloud.microsoft"
+MAX_RETRIES = 3
+HTTP_TIMEOUT = 30.0
+
+
+class Agent365ManualExporter(SpanExporter):
+    """Minimal exporter that POSTs spans to the Agent 365 backend."""
+
+    def __init__(self, token_resolver):
+        """
+        Args:
+            token_resolver: Callable(agent_id, tenant_id) -> bearer_token string.
+        """
+        self._token_resolver = token_resolver
+        self._session = requests.Session()
+
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        # Partition by (tenant_id, agent_id)
+        groups = self._partition(spans)
+        if not groups:
+            return SpanExportResult.SUCCESS
+
+        any_failure = False
+        for (tenant_id, agent_id), group_spans in groups.items():
+            url = (
+                f"{A365_ENDPOINT}/observability/tenants/{tenant_id}"
+                f"/otlp/agents/{agent_id}/traces?api-version=1"
+            )
+            payload = self._build_payload(group_spans)
+            body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
+
+            # Resolve auth token
+            try:
+                token = self._token_resolver(agent_id, tenant_id)
+            except Exception as e:
+                logger.error(f"Token resolution failed: {e}")
+                any_failure = True
+                continue
+
+            headers = {
+                "content-type": "application/json",
+                "authorization": f"Bearer {token}",
+            }
+
+            if not self._post_with_retries(url, body, headers):
+                any_failure = True
+
+        return SpanExportResult.FAILURE if any_failure else SpanExportResult.SUCCESS
+
+    def shutdown(self):
+        self._session.close()
+
+    def _partition(
+        self, spans: Sequence[ReadableSpan]
+    ) -> dict[tuple[str, str], list[ReadableSpan]]:
+        """Filter eligible spans and group by (tenant_id, agent_id)."""
+        groups: dict[tuple[str, str], list[ReadableSpan]] = {}
+        for sp in spans:
+            attrs = sp.attributes or {}
+            op_name = str(attrs.get("gen_ai.operation.name", ""))
+            if op_name not in ACCEPTED_OPERATIONS:
+                continue
+            tenant = str(attrs.get("microsoft.tenant.id", ""))
+            agent = str(attrs.get("gen_ai.agent.id", ""))
+            if not tenant or not agent:
+                continue
+            groups.setdefault((tenant, agent), []).append(sp)
+        return groups
+
+    def _build_payload(self, spans: Sequence[ReadableSpan]) -> dict:
+        """Build the OTLP-like JSON envelope."""
+        # Get resource attributes from the first span
+        resource_attrs = {}
+        if spans and spans[0].resource:
+            resource_attrs = dict(spans[0].resource.attributes)
+
+        # Group spans by instrumentation scope
+        scope_map: dict[tuple[str, str | None], list[dict]] = {}
+        for sp in spans:
+            scope = sp.instrumentation_scope
+            scope_name = scope.name if scope else "unknown"
+            scope_version = scope.version if scope else None
+            scope_map.setdefault((scope_name, scope_version), []).append(
+                self._map_span(sp)
+            )
+
+        scope_spans = [
+            {"scope": {"name": name, "version": version}, "spans": mapped}
+            for (name, version), mapped in scope_map.items()
+        ]
+
+        return {
+            "resourceSpans": [
+                {
+                    "resource": {"attributes": resource_attrs or None},
+                    "scopeSpans": scope_spans,
+                }
+            ]
+        }
+
+    @staticmethod
+    def _map_span(sp: ReadableSpan) -> dict:
+        """Convert a ReadableSpan to the A365 JSON format."""
+        ctx = sp.context
+        trace_id = f"{ctx.trace_id:032x}"
+        span_id = f"{ctx.span_id:016x}"
+        parent_span_id = None
+        if sp.parent and sp.parent.span_id:
+            parent_span_id = f"{sp.parent.span_id:016x}"
+
+        attrs = dict(sp.attributes or {})
+
+        # Map events
+        events = None
+        if sp.events:
+            events = [
+                {
+                    "timeUnixNano": ev.timestamp,
+                    "name": ev.name,
+                    "attributes": dict(ev.attributes) if ev.attributes else None,
+                }
+                for ev in sp.events
+            ]
+
+        # Map status
+        status_code = sp.status.status_code if sp.status else StatusCode.UNSET
+        status = {
+            "code": status_code.name,
+            "message": getattr(sp.status, "description", "") or "",
+        }
+
+        return {
+            "traceId": trace_id,
+            "spanId": span_id,
+            "parentSpanId": parent_span_id,
+            "name": sp.name,
+            "kind": sp.kind.name,
+            "startTimeUnixNano": sp.start_time,
+            "endTimeUnixNano": sp.end_time,
+            "attributes": attrs or None,
+            "events": events,
+            "links": None,
+            "status": status,
+        }
+
+    def _post_with_retries(self, url: str, body: str, headers: dict) -> bool:
+        """POST with exponential backoff on transient errors."""
+        for attempt in range(MAX_RETRIES + 1):
+            try:
+                resp = self._session.post(
+                    url, data=body, headers=headers, timeout=HTTP_TIMEOUT
+                )
+                if 200 <= resp.status_code < 300:
+                    return True
+                if resp.status_code in (408, 429) or resp.status_code >= 500:
+                    if attempt < MAX_RETRIES:
+                        # Respect Retry-After for 429
+                        retry_after = resp.headers.get("Retry-After")
+                        if retry_after and retry_after.isdigit():
+                            time.sleep(min(float(retry_after), 60.0))
+                        else:
+                            time.sleep(0.5 * (2 ** attempt))
+                        continue
+                logger.error(f"HTTP {resp.status_code}: {resp.text[:200]}")
+                return False
+            except requests.RequestException as e:
+                if attempt < MAX_RETRIES:
+                    time.sleep(0.5 * (2 ** attempt))
+                    continue
+                logger.error(f"Request failed after {MAX_RETRIES + 1} attempts: {e}")
+                return False
+        return False
+```
+
+**Usage:**
+
+```python
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+
+def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+    # Your token acquisition logic here (MSAL, managed identity, etc.)
+    return "your-bearer-token"
+
+exporter = Agent365ManualExporter(token_resolver=my_token_resolver)
+provider.add_span_processor(BatchSpanProcessor(exporter))
+```
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add Example 3 (DIY Agent365 exporter)"
+```
+
+---
+
+### Task 7: Write the end-to-end example
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add the end-to-end example combining all pieces**
+
+Append after Example 3:
+
+```markdown
+### Example 4: End-to-end agent loop with A365 export
+
+Combines everything: proper span hierarchy, all recommended attributes, and export to the Agent 365 backend.
+
+```python
+"""
+Complete example: manually instrumented agent with A365 export.
+
+Requirements:
+    pip install opentelemetry-sdk opentelemetry-api requests openai
+
+Replace the placeholder values with your actual tenant ID, agent ID,
+and token resolver implementation.
+"""
+
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+SERVICE_NAMESPACE = "my-namespace"
+MODEL_NAME = "gpt-4o"
+PROVIDER_NAME = "azure"
+SERVER_ADDRESS = "my-resource.openai.azure.com"
+
+
+def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+    """Replace with your actual token acquisition logic."""
+    raise NotImplementedError("Implement your token resolver")
+
+
+# --- OpenTelemetry setup ---
+resource = Resource.create({
+    "service.name": AGENT_NAME,
+    "service.namespace": SERVICE_NAMESPACE,
+})
+provider = TracerProvider(resource=resource)
+
+# For development: console export to verify spans locally
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+
+# For production: uncomment to export to Agent 365 backend
+# from agent365_exporter import Agent365ManualExporter  # Example 3 above
+# provider.add_span_processor(BatchSpanProcessor(
+#     Agent365ManualExporter(token_resolver=my_token_resolver)
+# ))
+
+trace.set_tracer_provider(provider)
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Common attributes helper ---
+COMMON_ATTRS = {
+    "microsoft.tenant.id": TENANT_ID,
+    "gen_ai.agent.id": AGENT_ID,
+    "gen_ai.agent.name": AGENT_NAME,
+    "telemetry.sdk.name": "A365ObservabilitySDK",
+    "telemetry.sdk.language": "python",
+    "telemetry.sdk.version": "1.0.0",
+}
+
+
+def set_common_attrs(span):
+    for key, value in COMMON_ATTRS.items():
+        span.set_attribute(key, value)
+
+
+# --- Simulated tools ---
+def get_weather(city: str) -> str:
+    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
+
+
+# --- Agent turn ---
+def handle_user_turn(user_message: str, user_id: str):
+    session_id = str(uuid.uuid4())
+    conversation_id = str(uuid.uuid4())
+
+    with tracer.start_as_current_span(
+        name=f"invoke_agent {AGENT_NAME}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as agent_span:
+        set_common_attrs(agent_span)
+        agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
+        agent_span.set_attribute("microsoft.session.id", session_id)
+        agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        agent_span.set_attribute("user.id", user_id)
+        agent_span.set_attribute("gen_ai.input.messages", json.dumps([
+            {"role": "user", "content": user_message}
+        ]))
+
+        # Step 1: Call the LLM
+        with tracer.start_as_current_span(
+            name=f"Chat {MODEL_NAME}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as inference_span:
+            set_common_attrs(inference_span)
+            inference_span.set_attribute("gen_ai.operation.name", "Chat")
+            inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+            inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+            inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            inference_span.set_attribute("server.address", SERVER_ADDRESS)
+
+            # ... your LLM call here ...
+            # Simulate response with tool call
+            inference_span.set_attribute("gen_ai.usage.input_tokens", 55)
+            inference_span.set_attribute("gen_ai.usage.output_tokens", 22)
+            inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
+
+        # Step 2: Execute the tool
+        tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
+        tool_name = "get_weather"
+        tool_args = json.dumps({"city": "Seattle"})
+
+        with tracer.start_as_current_span(
+            name=f"execute_tool {tool_name}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as tool_span:
+            set_common_attrs(tool_span)
+            tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
+            tool_span.set_attribute("gen_ai.tool.name", tool_name)
+            tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
+            tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
+            tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            tool_span.set_attribute("gen_ai.tool.type", "function")
+
+            result = get_weather("Seattle")
+            tool_span.set_attribute("gen_ai.tool.call.result", result)
+
+        # Step 3: Final LLM call with tool result
+        with tracer.start_as_current_span(
+            name=f"Chat {MODEL_NAME}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as final_inference_span:
+            set_common_attrs(final_inference_span)
+            final_inference_span.set_attribute("gen_ai.operation.name", "Chat")
+            final_inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+            final_inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+            final_inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            final_inference_span.set_attribute("server.address", SERVER_ADDRESS)
+
+            # ... your LLM call with tool result here ...
+            final_inference_span.set_attribute("gen_ai.usage.input_tokens", 85)
+            final_inference_span.set_attribute("gen_ai.usage.output_tokens", 45)
+            final_inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["stop"]))
+
+
+# --- Run ---
+if __name__ == "__main__":
+    handle_user_turn("What's the weather in Seattle?", user_id="user-456")
+    provider.force_flush()
+    print("Done — check console output for spans")
+```
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add Example 4 (end-to-end agent loop)"
+```
+
+---
+
+### Task 8: Write validation and troubleshooting section
+
+**Files:**
+- Modify: `docs/manual-a365-span-instrumentation.md`
+
+- [ ] **Step 1: Add validation and troubleshooting**
+
+Append at the end of the document:
+
+```markdown
+## Validation and troubleshooting
+
+### Verifying locally
+
+1. Use `ConsoleSpanExporter` (shown in the examples above) to dump spans to stdout
+2. Check that each span has:
+   - A `gen_ai.operation.name` from the [accepted values list](#accepted-gen_aioperation_name-values)
+   - Both `microsoft.tenant.id` and `gen_ai.agent.id` set to non-empty strings
+   - Correct parent-child relationships (`parentSpanId` on children matches root's `spanId`)
+
+### Verifying against the backend
+
+After switching to the `Agent365ManualExporter`:
+
+1. **HTTP 200–299** → spans accepted. They should appear in the Agent 365 portal within a few minutes.
+2. **HTTP 401/403** → token resolver returned an invalid or expired token. Check your auth implementation.
+3. **HTTP 400** → payload format is wrong. Validate your JSON against the [payload format](#payload-format) section.
+4. **HTTP 429** → rate limited. The exporter should respect `Retry-After` and retry automatically.
+5. **No response / timeout** → check network connectivity to `agent365.svc.cloud.microsoft`.
+
+### Common issues
+
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| Spans don't appear in portal | `gen_ai.operation.name` not in accepted list | Use exactly `"invoke_agent"`, `"Chat"`, or `"execute_tool"` |
+| Spans silently dropped | Missing `microsoft.tenant.id` or `gen_ai.agent.id` | Ensure both are set on every span |
+| HTTP 400 from backend | Payload structure doesn't match expected format | Verify JSON envelope matches the documented structure |
+| HTTP 401 from backend | Token resolver returns wrong/expired token | Debug your token acquisition; ensure scope matches |
+| Only `invoke_agent` spans visible | Child spans missing required identity attrs | Set `microsoft.tenant.id` and `gen_ai.agent.id` on ALL spans, not just the root |
+| Large spans truncated | Span exceeds 250KB | Reduce `gen_ai.input.messages` / `gen_ai.output.messages` content |
+
+### Versioning note
+
+This document describes the Agent 365 backend contract as of May 2026. The payload format may evolve over time. The A365 SDK (`microsoft-agents-a365-observability-core`) handles format changes automatically and is the recommended path for production workloads that can accept the dependency.
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/manual-a365-span-instrumentation.md
+git commit -m "docs: add validation and troubleshooting section"
+```
+
+---
+
+### Task 9: Add cross-link from existing integration guide
+
+**Files:**
+- Modify: `docs/integrating-with-existing-opentelemetry.md` (add a callout near the top)
+
+- [ ] **Step 1: Add a cross-reference after the first paragraph**
+
+After line 3 (the intro paragraph ending with "...for the standalone setup."), add:
+
+```markdown
+
+> **Don't want the SDK dependency at all?** See [Manual Agent 365 span instrumentation](./manual-a365-span-instrumentation.md) for how to set the right attributes and export to the A365 backend using only `opentelemetry-sdk` + `requests`.
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add docs/integrating-with-existing-opentelemetry.md
+git commit -m "docs: cross-link to manual instrumentation guide"
+```
+
+---
+
+### Task 10: Final review and push
+
+**Files:**
+- Review: `docs/manual-a365-span-instrumentation.md` (full read-through)
+
+- [ ] **Step 1: Review the complete document for consistency**
+
+Read through `docs/manual-a365-span-instrumentation.md` end-to-end and verify:
+- All attribute keys in examples match the attribute tables exactly
+- All code snippets use consistent variable names (`TENANT_ID`, `AGENT_ID`, etc.)
+- No broken internal markdown links
+- Payload format example matches the exporter code structure
+
+- [ ] **Step 2: Run a quick markdown lint (if available)**
+
+```bash
+# Optional: check for markdown issues
+cat docs/manual-a365-span-instrumentation.md | head -5
+```
+
+- [ ] **Step 3: Push the branch**
+
+```bash
+git push origin docs/manual-a365-span-instrumentation
+```
+
+- [ ] **Step 4: Create PR**
+
+```bash
+gh pr create --title "docs: add manual A365 span instrumentation guide (without SDK)" \
+  --body "Adds documentation for teams that want A365 portal compatibility without the SDK dependency.
+
+Covers:
+- Tiered attribute contract (required/recommended/optional) for all three span types
+- Export protocol (endpoint, auth, payload format, constraints)
+- Complete runnable Python examples using only opentelemetry-sdk + requests
+- Custom SpanExporter implementation for the A365 backend
+- Validation and troubleshooting guide
+
+Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" \
+  --base main
+```

From 169f8d2e79daeb665d21a4f1454fd811bc4d406f Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:39:37 -0600
Subject: [PATCH 03/11] docs: scaffold manual A365 span instrumentation guide

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 27 ++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 docs/manual-a365-span-instrumentation.md

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
new file mode 100644
index 00000000..cbc145af
--- /dev/null
+++ b/docs/manual-a365-span-instrumentation.md
@@ -0,0 +1,27 @@
+# Manual Agent 365 span instrumentation (without the SDK)
+
+This guide documents the **attribute contract** and **export protocol** for making your OpenTelemetry spans compatible with the Agent 365 observability backend — without importing any `microsoft-agents-a365-*` package.
+
+## When to use this guide
+
+Use this guide if you:
+
+- Have an existing Python application already instrumented with OpenTelemetry
+- Want your agent spans to appear in the Agent 365 portal
+- Prefer not to add the Agent 365 SDK as a dependency
+
+**When to use the SDK instead:** If you're starting fresh or can accept the dependency, the SDK (`microsoft-agents-a365-observability-core`) handles all of this automatically — attribute setting, span lifecycle, export, retries, and payload chunking. See [Integrating with existing OpenTelemetry](./integrating-with-existing-opentelemetry.md).
+
+## Prerequisites
+
+- Python 3.11+
+- `opentelemetry-sdk` (any recent version)
+- `requests` (for manual export to the A365 backend)
+- A registered Agent 365 agent (you'll need the `tenant_id` and `agent_id`)
+- A token resolver that can produce a Bearer token for the A365 ingestion endpoint
+
+Install dependencies:
+
+```bash
+pip install opentelemetry-sdk opentelemetry-api requests
+```

From cfc1ae4dcb28693628b15bcf614bcde1011ce4fe Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:43:41 -0600
Subject: [PATCH 04/11] docs: add attribute contract tables to manual
 instrumentation guide

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 105 +++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index cbc145af..c5901f60 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -25,3 +25,108 @@ Install dependencies:
 ```bash
 pip install opentelemetry-sdk opentelemetry-api requests
 ```
+
+## Attribute contract
+
+The Agent 365 backend filters spans by `gen_ai.operation.name` and routes them by `microsoft.tenant.id` + `gen_ai.agent.id`. Spans missing required attributes are silently dropped.
+
+### Accepted `gen_ai.operation.name` values
+
+Only spans with one of these values pass the backend's ingest filter:
+
+| Value | Span type |
+|-------|-----------|
+| `invoke_agent` | Top-level agent invocation |
+| `Chat` | Inference (manual instrumentation convention) |
+| `chat` | Inference (OTel GenAI semconv / auto-instrumentation) |
+| `TextCompletion` | Inference (text completion) |
+| `GenerateContent` | Inference (content generation) |
+| `execute_tool` | Tool execution |
+
+### `invoke_agent` span
+
+The top-level span representing one user turn / agent invocation.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"invoke_agent"` | Must match exactly |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Used for routing |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Used for routing |
+| Recommended | `gen_ai.agent.name` | Human-readable agent name | Displayed in portal |
+| Recommended | `microsoft.session.id` | Session identifier | Groups turns in portal |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | Thread grouping |
+| Recommended | `microsoft.a365.agent.blueprint.id` | Blueprint GUID | Links to agent definition |
+| Recommended | `microsoft.a365.agent.platform.id` | Platform identifier | Identifies hosting platform |
+| Recommended | `user.id` | End-user identifier | Portal user analytics |
+| Recommended | `server.address` | Server hostname | |
+| Optional | `gen_ai.agent.description` | Agent description | |
+| Optional | `gen_ai.agent.version` | Agent version string | |
+| Optional | `microsoft.agent.user.id` | Agent's service identity | |
+| Optional | `microsoft.agent.user.email` | Agent's service email | |
+| Optional | `user.email` | End-user email | |
+| Optional | `user.name` | End-user display name | |
+| Optional | `client.address` | Client IP or hostname | |
+| Optional | `microsoft.channel.name` | Channel (e.g. `"Teams"`, `"Webchat"`) | |
+| Optional | `microsoft.channel.link` | Channel URL | |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages | Can be large; may be truncated |
+| Optional | `microsoft.a365.caller.agent.name` | Calling agent name | For agent-to-agent calls |
+| Optional | `microsoft.a365.caller.agent.id` | Calling agent GUID | For agent-to-agent calls |
+| Optional | `microsoft.a365.caller.agent.blueprint.id` | Calling agent blueprint | For agent-to-agent calls |
+
+### `inference` span (LLM call)
+
+Child of `invoke_agent`. One per LLM inference call.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"Chat"` or `"TextCompletion"` or `"GenerateContent"` | See accepted values above |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
+| **Required** | `gen_ai.request.model` | Model name (e.g. `"gpt-4o"`) | |
+| Recommended | `gen_ai.usage.input_tokens` | Integer | Token billing/monitoring |
+| Recommended | `gen_ai.usage.output_tokens` | Integer | Token billing/monitoring |
+| Recommended | `gen_ai.response.finish_reasons` | JSON array (e.g. `["stop"]`) | |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
+| Recommended | `gen_ai.provider.name` | `"openai"`, `"azure"`, etc. | |
+| Optional | `gen_ai.input.messages` | JSON-serialized input messages | |
+| Optional | `gen_ai.output.messages` | JSON-serialized output messages | |
+| Optional | `server.address` | LLM endpoint hostname | |
+| Optional | `server.port` | LLM endpoint port | Omit if 443 |
+| Optional | `microsoft.a365.agent.thought.process` | Agent reasoning trace | |
+
+### `execute_tool` span
+
+Child of `invoke_agent`. One per tool invocation.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"execute_tool"` | Must match exactly |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
+| **Required** | `gen_ai.tool.name` | Tool function name | |
+| Recommended | `gen_ai.tool.call.id` | Tool call ID from LLM response | |
+| Recommended | `gen_ai.tool.call.arguments` | JSON-serialized arguments | |
+| Recommended | `gen_ai.tool.call.result` | JSON-serialized result | Set after execution |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
+| Optional | `gen_ai.tool.type` | `"function"` | |
+| Optional | `gen_ai.tool.description` | Tool description | |
+| Optional | `server.address` | Server hostname | |
+
+### Resource attributes
+
+Set these on your `TracerProvider`'s `Resource`:
+
+| Tier | Attribute | Expected value |
+|------|-----------|----------------|
+| **Required** | `service.name` | Your service/agent name |
+| Recommended | `service.namespace` | Your service namespace |
+
+### SDK-identifying attributes (optional)
+
+Set these on every span to identify your telemetry source:
+
+| Attribute | Value |
+|-----------|-------|
+| `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
+| `telemetry.sdk.language` | `"python"` |
+| `telemetry.sdk.version` | Your version string |

From 719868eb0a2528500365b1573ee7858c7c94e7e6 Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:46:10 -0600
Subject: [PATCH 05/11] docs: add Examples 1 and 2 (minimal span + full
 hierarchy)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 156 +++++++++++++++++++++++
 1 file changed, 156 insertions(+)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index c5901f60..aa086d13 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -130,3 +130,159 @@ Set these on every span to identify your telemetry source:
 | `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
 | `telemetry.sdk.language` | `"python"` |
 | `telemetry.sdk.version` | Your version string |
+
+## Examples
+
+### Example 1: Minimal `invoke_agent` span
+
+Creates a single root span with only the required attributes and exports to console for verification.
+
+```python
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration (replace with your values) ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+
+# --- Set up OpenTelemetry with console export ---
+resource = Resource.create({"service.name": AGENT_NAME})
+provider = TracerProvider(resource=resource)
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+trace.set_tracer_provider(provider)
+
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Create an invoke_agent span ---
+with tracer.start_as_current_span(
+    name=f"invoke_agent {AGENT_NAME}",
+    kind=trace.SpanKind.INTERNAL,
+) as span:
+    # Required attributes
+    span.set_attribute("gen_ai.operation.name", "invoke_agent")
+    span.set_attribute("microsoft.tenant.id", TENANT_ID)
+    span.set_attribute("gen_ai.agent.id", AGENT_ID)
+
+    # Recommended attributes
+    span.set_attribute("gen_ai.agent.name", AGENT_NAME)
+    span.set_attribute("microsoft.session.id", str(uuid.uuid4()))
+    span.set_attribute("gen_ai.conversation.id", str(uuid.uuid4()))
+
+    # ... your agent logic here ...
+    print("Agent invoked successfully")
+
+# Flush to ensure spans are exported
+provider.force_flush()
+```
+
+Run this and you should see a JSON span dump on stdout with `gen_ai.operation.name: invoke_agent`.
+
+### Example 2: Full agent turn with span hierarchy
+
+Creates the proper parent-child relationship: `invoke_agent` → `inference` + `execute_tool`.
+
+```python
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+MODEL_NAME = "gpt-4o"
+PROVIDER_NAME = "azure"
+
+# --- OpenTelemetry setup ---
+resource = Resource.create({
+    "service.name": AGENT_NAME,
+    "service.namespace": "my-namespace",
+})
+provider = TracerProvider(resource=resource)
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+trace.set_tracer_provider(provider)
+
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Simulate an agent turn ---
+session_id = str(uuid.uuid4())
+conversation_id = str(uuid.uuid4())
+user_message = "What's the weather in Seattle?"
+
+
+def get_weather(city: str) -> str:
+    """Simulated tool."""
+    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
+
+
+# Top-level: invoke_agent
+with tracer.start_as_current_span(
+    name=f"invoke_agent {AGENT_NAME}",
+    kind=trace.SpanKind.INTERNAL,
+) as agent_span:
+    agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
+    agent_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+    agent_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+    agent_span.set_attribute("gen_ai.agent.name", AGENT_NAME)
+    agent_span.set_attribute("microsoft.session.id", session_id)
+    agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
+    agent_span.set_attribute("user.id", "user-123")
+    agent_span.set_attribute("gen_ai.input.messages", json.dumps([
+        {"role": "user", "content": user_message}
+    ]))
+
+    # Child: inference (LLM call)
+    with tracer.start_as_current_span(
+        name=f"Chat {MODEL_NAME}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as inference_span:
+        inference_span.set_attribute("gen_ai.operation.name", "Chat")
+        inference_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+        inference_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+        inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+        inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+        inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        inference_span.set_attribute("server.address", "my-resource.openai.azure.com")
+
+        # ... call your LLM here ...
+        # After response:
+        inference_span.set_attribute("gen_ai.usage.input_tokens", 42)
+        inference_span.set_attribute("gen_ai.usage.output_tokens", 15)
+        inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
+
+    # Child: execute_tool
+    tool_call_id = "call_abc123"
+    tool_name = "get_weather"
+    tool_args = json.dumps({"city": "Seattle"})
+
+    with tracer.start_as_current_span(
+        name=f"execute_tool {tool_name}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as tool_span:
+        tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
+        tool_span.set_attribute("microsoft.tenant.id", TENANT_ID)
+        tool_span.set_attribute("gen_ai.agent.id", AGENT_ID)
+        tool_span.set_attribute("gen_ai.tool.name", tool_name)
+        tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
+        tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
+        tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        tool_span.set_attribute("gen_ai.tool.type", "function")
+
+        # Execute the tool
+        result = get_weather("Seattle")
+        tool_span.set_attribute("gen_ai.tool.call.result", result)
+
+provider.force_flush()
+```
+
+You should see three spans in the console output: `invoke_agent my-weather-agent` (root), `Chat gpt-4o` (child), and `execute_tool get_weather` (child). Verify that `parentSpanId` on the children matches the root's `spanId`.

From 9b4c7d9021f87dc5d3d53565fe5eb55628ab04db Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:47:19 -0600
Subject: [PATCH 06/11] docs: add export protocol section (endpoint, auth,
 payload, constraints)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 119 +++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index aa086d13..fd76663c 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -286,3 +286,122 @@ provider.force_flush()
 ```
 
 You should see three spans in the console output: `invoke_agent my-weather-agent` (root), `Chat gpt-4o` (child), and `execute_tool get_weather` (child). Verify that `parentSpanId` on the children matches the root's `spanId`.
+
+## Exporting to the Agent 365 backend
+
+The Agent 365 backend does **not** accept standard OTLP protobuf or OTLP/HTTP JSON. It uses a custom OTLP-like JSON format. This section documents the HTTP contract.
+
+### Endpoint
+
+```
+POST https://agent365.svc.cloud.microsoft/observability/tenants/{tenantId}/otlp/agents/{agentId}/traces?api-version=1
+```
+
+Replace `{tenantId}` and `{agentId}` with the values from your span attributes (`microsoft.tenant.id` and `gen_ai.agent.id`).
+
+### Authentication
+
+Every request requires a Bearer token:
+
+```
+Authorization: Bearer <token>
+Content-Type: application/json
+```
+
+The token is obtained from a **token resolver** — a function with signature:
+
+```python
+def resolve_token(agent_id: str, tenant_id: str) -> str:
+    """Return a valid Bearer token for the given agent and tenant."""
+    ...
+```
+
+How you implement this depends on your environment (MSAL client credentials, managed identity, etc.). The A365 SDK uses this same interface internally.
+
+### Payload format
+
+The body is JSON with this structure:
+
+```json
+{
+  "resourceSpans": [
+    {
+      "resource": {
+        "attributes": {
+          "service.name": "my-agent",
+          "service.namespace": "my-namespace"
+        }
+      },
+      "scopeSpans": [
+        {
+          "scope": {
+            "name": "my-agent-instrumentation",
+            "version": "1.0.0"
+          },
+          "spans": [
+            {
+              "traceId": "0af7651916cd43dd8448eb211c80319c",
+              "spanId": "b7ad6b7169203331",
+              "parentSpanId": null,
+              "name": "invoke_agent my-agent",
+              "kind": "INTERNAL",
+              "startTimeUnixNano": 1716000000000000000,
+              "endTimeUnixNano": 1716000001000000000,
+              "attributes": {
+                "gen_ai.operation.name": "invoke_agent",
+                "microsoft.tenant.id": "tenant-guid",
+                "gen_ai.agent.id": "agent-guid"
+              },
+              "events": null,
+              "links": null,
+              "status": {
+                "code": "OK",
+                "message": ""
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Field reference
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `traceId` | string | 32 hex chars (128-bit trace ID) |
+| `spanId` | string | 16 hex chars (64-bit span ID) |
+| `parentSpanId` | string \| null | Parent's spanId, or null for root |
+| `name` | string | Span name (see naming conventions below) |
+| `kind` | string | Span kind name: `"INTERNAL"`, `"CLIENT"`, `"SERVER"`, etc. |
+| `startTimeUnixNano` | integer | Start time in nanoseconds since Unix epoch |
+| `endTimeUnixNano` | integer | End time in nanoseconds since Unix epoch |
+| `attributes` | object \| null | Key-value map of span attributes |
+| `events` | array \| null | Span events (exceptions, logs) |
+| `links` | array \| null | Span links |
+| `status.code` | string | `"UNSET"`, `"OK"`, or `"ERROR"` |
+| `status.message` | string | Error description (empty for non-error) |
+
+### Span name conventions
+
+| Span type | Name format | Example |
+|-----------|-------------|---------|
+| invoke_agent | `"invoke_agent"` or `"invoke_agent <agent_name>"` | `"invoke_agent my-weather-agent"` |
+| inference | `"<operation> <model>"` | `"Chat gpt-4o"` |
+| execute_tool | `"execute_tool <tool_name>"` | `"execute_tool get_weather"` |
+
+### Constraints
+
+| Constraint | Value | Behavior |
+|------------|-------|----------|
+| Max payload size | ~900,000 bytes | Split spans across multiple POST requests |
+| Max individual span | 250,000 bytes | Largest attributes are replaced with `"TRUNCATED"` |
+| Retry on | 408, 429, 5xx | Exponential backoff; respect `Retry-After` header for 429 |
+| Fail on | Other 4xx | Non-retryable; check auth and payload format |
+| Timeout | 30 seconds | Per-request HTTP timeout |
+
+### Grouping requirement
+
+All spans in a single POST must share the same `microsoft.tenant.id` and `gen_ai.agent.id`. If your batch contains spans for multiple tenants or agents, partition them into separate requests.

From 13b2c0db8e01f3e34c3868b78bff13e96dea2a96 Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:48:39 -0600
Subject: [PATCH 07/11] docs: add Example 3 (DIY Agent365 exporter)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 209 +++++++++++++++++++++++
 1 file changed, 209 insertions(+)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index fd76663c..6d9215de 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -405,3 +405,212 @@ The body is JSON with this structure:
 ### Grouping requirement
 
 All spans in a single POST must share the same `microsoft.tenant.id` and `gen_ai.agent.id`. If your batch contains spans for multiple tenants or agents, partition them into separate requests.
+
+### Example 3: Custom exporter for the Agent 365 backend
+
+A minimal `SpanExporter` that builds the JSON envelope and POSTs to the A365 endpoint. This replaces the SDK's internal exporter without any A365 dependency.
+
+```python
+import json
+import logging
+import time
+from collections.abc import Sequence
+
+import requests
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from opentelemetry.trace import StatusCode
+
+logger = logging.getLogger(__name__)
+
+# Accepted operation names — spans with other values are filtered out
+ACCEPTED_OPERATIONS = frozenset({
+    "invoke_agent", "execute_tool", "chat", "Chat", "TextCompletion", "GenerateContent",
+})
+
+A365_ENDPOINT = "https://agent365.svc.cloud.microsoft"
+MAX_RETRIES = 3
+HTTP_TIMEOUT = 30.0
+
+
+class Agent365ManualExporter(SpanExporter):
+    """Minimal exporter that POSTs spans to the Agent 365 backend."""
+
+    def __init__(self, token_resolver):
+        """
+        Args:
+            token_resolver: Callable(agent_id, tenant_id) -> bearer_token string.
+        """
+        self._token_resolver = token_resolver
+        self._session = requests.Session()
+
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        # Partition by (tenant_id, agent_id)
+        groups = self._partition(spans)
+        if not groups:
+            return SpanExportResult.SUCCESS
+
+        any_failure = False
+        for (tenant_id, agent_id), group_spans in groups.items():
+            url = (
+                f"{A365_ENDPOINT}/observability/tenants/{tenant_id}"
+                f"/otlp/agents/{agent_id}/traces?api-version=1"
+            )
+            payload = self._build_payload(group_spans)
+            body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
+
+            # Resolve auth token
+            try:
+                token = self._token_resolver(agent_id, tenant_id)
+            except Exception as e:
+                logger.error(f"Token resolution failed: {e}")
+                any_failure = True
+                continue
+
+            headers = {
+                "content-type": "application/json",
+                "authorization": f"Bearer {token}",
+            }
+
+            if not self._post_with_retries(url, body, headers):
+                any_failure = True
+
+        return SpanExportResult.FAILURE if any_failure else SpanExportResult.SUCCESS
+
+    def shutdown(self):
+        self._session.close()
+
+    def _partition(
+        self, spans: Sequence[ReadableSpan]
+    ) -> dict[tuple[str, str], list[ReadableSpan]]:
+        """Filter eligible spans and group by (tenant_id, agent_id)."""
+        groups: dict[tuple[str, str], list[ReadableSpan]] = {}
+        for sp in spans:
+            attrs = sp.attributes or {}
+            op_name = str(attrs.get("gen_ai.operation.name", ""))
+            if op_name not in ACCEPTED_OPERATIONS:
+                continue
+            tenant = str(attrs.get("microsoft.tenant.id", ""))
+            agent = str(attrs.get("gen_ai.agent.id", ""))
+            if not tenant or not agent:
+                continue
+            groups.setdefault((tenant, agent), []).append(sp)
+        return groups
+
+    def _build_payload(self, spans: Sequence[ReadableSpan]) -> dict:
+        """Build the OTLP-like JSON envelope."""
+        # Get resource attributes from the first span
+        resource_attrs = {}
+        if spans and spans[0].resource:
+            resource_attrs = dict(spans[0].resource.attributes)
+
+        # Group spans by instrumentation scope
+        scope_map: dict[tuple[str, str | None], list[dict]] = {}
+        for sp in spans:
+            scope = sp.instrumentation_scope
+            scope_name = scope.name if scope else "unknown"
+            scope_version = scope.version if scope else None
+            scope_map.setdefault((scope_name, scope_version), []).append(
+                self._map_span(sp)
+            )
+
+        scope_spans = [
+            {"scope": {"name": name, "version": version}, "spans": mapped}
+            for (name, version), mapped in scope_map.items()
+        ]
+
+        return {
+            "resourceSpans": [
+                {
+                    "resource": {"attributes": resource_attrs or None},
+                    "scopeSpans": scope_spans,
+                }
+            ]
+        }
+
+    @staticmethod
+    def _map_span(sp: ReadableSpan) -> dict:
+        """Convert a ReadableSpan to the A365 JSON format."""
+        ctx = sp.context
+        trace_id = f"{ctx.trace_id:032x}"
+        span_id = f"{ctx.span_id:016x}"
+        parent_span_id = None
+        if sp.parent and sp.parent.span_id:
+            parent_span_id = f"{sp.parent.span_id:016x}"
+
+        attrs = dict(sp.attributes or {})
+
+        # Map events
+        events = None
+        if sp.events:
+            events = [
+                {
+                    "timeUnixNano": ev.timestamp,
+                    "name": ev.name,
+                    "attributes": dict(ev.attributes) if ev.attributes else None,
+                }
+                for ev in sp.events
+            ]
+
+        # Map status
+        status_code = sp.status.status_code if sp.status else StatusCode.UNSET
+        status = {
+            "code": status_code.name,
+            "message": getattr(sp.status, "description", "") or "",
+        }
+
+        return {
+            "traceId": trace_id,
+            "spanId": span_id,
+            "parentSpanId": parent_span_id,
+            "name": sp.name,
+            "kind": sp.kind.name,
+            "startTimeUnixNano": sp.start_time,
+            "endTimeUnixNano": sp.end_time,
+            "attributes": attrs or None,
+            "events": events,
+            "links": None,
+            "status": status,
+        }
+
+    def _post_with_retries(self, url: str, body: str, headers: dict) -> bool:
+        """POST with exponential backoff on transient errors."""
+        for attempt in range(MAX_RETRIES + 1):
+            try:
+                resp = self._session.post(
+                    url, data=body, headers=headers, timeout=HTTP_TIMEOUT
+                )
+                if 200 <= resp.status_code < 300:
+                    return True
+                if resp.status_code in (408, 429) or resp.status_code >= 500:
+                    if attempt < MAX_RETRIES:
+                        # Respect Retry-After for 429
+                        retry_after = resp.headers.get("Retry-After")
+                        if retry_after and retry_after.isdigit():
+                            time.sleep(min(float(retry_after), 60.0))
+                        else:
+                            time.sleep(0.5 * (2 ** attempt))
+                        continue
+                logger.error(f"HTTP {resp.status_code}: {resp.text[:200]}")
+                return False
+            except requests.RequestException as e:
+                if attempt < MAX_RETRIES:
+                    time.sleep(0.5 * (2 ** attempt))
+                    continue
+                logger.error(f"Request failed after {MAX_RETRIES + 1} attempts: {e}")
+                return False
+        return False
+```
+
+**Usage:**
+
+```python
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+
+def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+    # Your token acquisition logic here (MSAL, managed identity, etc.)
+    return "your-bearer-token"
+
+exporter = Agent365ManualExporter(token_resolver=my_token_resolver)
+provider.add_span_processor(BatchSpanProcessor(exporter))
+```

From 7d2854220ebaeb1013cd45204358d0236bb6a618 Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:50:25 -0600
Subject: [PATCH 08/11] docs: add Example 4 (end-to-end) and
 validation/troubleshooting

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 194 +++++++++++++++++++++++
 1 file changed, 194 insertions(+)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index 6d9215de..0c6ceddf 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -614,3 +614,197 @@ def my_token_resolver(agent_id: str, tenant_id: str) -> str:
 exporter = Agent365ManualExporter(token_resolver=my_token_resolver)
 provider.add_span_processor(BatchSpanProcessor(exporter))
 ```
+
+### Example 4: End-to-end agent loop with A365 export
+
+Combines everything: proper span hierarchy, all recommended attributes, and export to the Agent 365 backend.
+
+```python
+"""
+Complete example: manually instrumented agent with A365 export.
+
+Requirements:
+    pip install opentelemetry-sdk opentelemetry-api requests
+
+Replace the placeholder values with your actual tenant ID, agent ID,
+and token resolver implementation.
+"""
+
+import json
+import uuid
+
+from opentelemetry import trace
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+# --- Configuration ---
+TENANT_ID = "your-tenant-guid"
+AGENT_ID = "your-agent-guid"
+AGENT_NAME = "my-weather-agent"
+SERVICE_NAMESPACE = "my-namespace"
+MODEL_NAME = "gpt-4o"
+PROVIDER_NAME = "azure"
+SERVER_ADDRESS = "my-resource.openai.azure.com"
+
+
+def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+    """Replace with your actual token acquisition logic."""
+    raise NotImplementedError("Implement your token resolver")
+
+
+# --- OpenTelemetry setup ---
+resource = Resource.create({
+    "service.name": AGENT_NAME,
+    "service.namespace": SERVICE_NAMESPACE,
+})
+provider = TracerProvider(resource=resource)
+
+# For development: console export to verify spans locally
+provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+
+# For production: uncomment to export to Agent 365 backend
+# from agent365_exporter import Agent365ManualExporter  # Example 3 above
+# provider.add_span_processor(BatchSpanProcessor(
+#     Agent365ManualExporter(token_resolver=my_token_resolver)
+# ))
+
+trace.set_tracer_provider(provider)
+tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
+
+# --- Common attributes helper ---
+COMMON_ATTRS = {
+    "microsoft.tenant.id": TENANT_ID,
+    "gen_ai.agent.id": AGENT_ID,
+    "gen_ai.agent.name": AGENT_NAME,
+    "telemetry.sdk.name": "A365ObservabilitySDK",
+    "telemetry.sdk.language": "python",
+    "telemetry.sdk.version": "1.0.0",
+}
+
+
+def set_common_attrs(span):
+    for key, value in COMMON_ATTRS.items():
+        span.set_attribute(key, value)
+
+
+# --- Simulated tools ---
+def get_weather(city: str) -> str:
+    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
+
+
+# --- Agent turn ---
+def handle_user_turn(user_message: str, user_id: str):
+    session_id = str(uuid.uuid4())
+    conversation_id = str(uuid.uuid4())
+
+    with tracer.start_as_current_span(
+        name=f"invoke_agent {AGENT_NAME}",
+        kind=trace.SpanKind.INTERNAL,
+    ) as agent_span:
+        set_common_attrs(agent_span)
+        agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
+        agent_span.set_attribute("microsoft.session.id", session_id)
+        agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
+        agent_span.set_attribute("user.id", user_id)
+        agent_span.set_attribute("gen_ai.input.messages", json.dumps([
+            {"role": "user", "content": user_message}
+        ]))
+
+        # Step 1: Call the LLM
+        with tracer.start_as_current_span(
+            name=f"Chat {MODEL_NAME}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as inference_span:
+            set_common_attrs(inference_span)
+            inference_span.set_attribute("gen_ai.operation.name", "Chat")
+            inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+            inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+            inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            inference_span.set_attribute("server.address", SERVER_ADDRESS)
+
+            # ... your LLM call here ...
+            # Simulate response with tool call
+            inference_span.set_attribute("gen_ai.usage.input_tokens", 55)
+            inference_span.set_attribute("gen_ai.usage.output_tokens", 22)
+            inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
+
+        # Step 2: Execute the tool
+        tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
+        tool_name = "get_weather"
+        tool_args = json.dumps({"city": "Seattle"})
+
+        with tracer.start_as_current_span(
+            name=f"execute_tool {tool_name}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as tool_span:
+            set_common_attrs(tool_span)
+            tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
+            tool_span.set_attribute("gen_ai.tool.name", tool_name)
+            tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
+            tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
+            tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            tool_span.set_attribute("gen_ai.tool.type", "function")
+
+            result = get_weather("Seattle")
+            tool_span.set_attribute("gen_ai.tool.call.result", result)
+
+        # Step 3: Final LLM call with tool result
+        with tracer.start_as_current_span(
+            name=f"Chat {MODEL_NAME}",
+            kind=trace.SpanKind.INTERNAL,
+        ) as final_inference_span:
+            set_common_attrs(final_inference_span)
+            final_inference_span.set_attribute("gen_ai.operation.name", "Chat")
+            final_inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
+            final_inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
+            final_inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
+            final_inference_span.set_attribute("server.address", SERVER_ADDRESS)
+
+            # ... your LLM call with tool result here ...
+            final_inference_span.set_attribute("gen_ai.usage.input_tokens", 85)
+            final_inference_span.set_attribute("gen_ai.usage.output_tokens", 45)
+            final_inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["stop"]))
+
+
+# --- Run ---
+if __name__ == "__main__":
+    handle_user_turn("What's the weather in Seattle?", user_id="user-456")
+    provider.force_flush()
+    print("Done — check console output for spans")
+```
+
+## Validation and troubleshooting
+
+### Verifying locally
+
+1. Use `ConsoleSpanExporter` (shown in the examples above) to dump spans to stdout
+2. Check that each span has:
+   - A `gen_ai.operation.name` from the [accepted values list](#accepted-gen_aioperationname-values)
+   - Both `microsoft.tenant.id` and `gen_ai.agent.id` set to non-empty strings
+   - Correct parent-child relationships (`parentSpanId` on children matches root's `spanId`)
+
+### Verifying against the backend
+
+After switching to the `Agent365ManualExporter`:
+
+1. **HTTP 200–299** → spans accepted. They should appear in the Agent 365 portal within a few minutes.
+2. **HTTP 401/403** → token resolver returned an invalid or expired token. Check your auth implementation.
+3. **HTTP 400** → payload format is wrong. Validate your JSON against the [payload format](#payload-format) section.
+4. **HTTP 429** → rate limited. The exporter should respect `Retry-After` and retry automatically.
+5. **No response / timeout** → check network connectivity to `agent365.svc.cloud.microsoft`.
+
+### Common issues
+
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| Spans don't appear in portal | `gen_ai.operation.name` not in accepted list | Use exactly `"invoke_agent"`, `"Chat"`, or `"execute_tool"` |
+| Spans silently dropped | Missing `microsoft.tenant.id` or `gen_ai.agent.id` | Ensure both are set on every span |
+| HTTP 400 from backend | Payload structure doesn't match expected format | Verify JSON envelope matches the documented structure |
+| HTTP 401 from backend | Token resolver returns wrong/expired token | Debug your token acquisition; ensure scope matches |
+| Only `invoke_agent` spans visible | Child spans missing required identity attrs | Set `microsoft.tenant.id` and `gen_ai.agent.id` on ALL spans, not just the root |
+| Large spans truncated | Span exceeds 250KB | Reduce `gen_ai.input.messages` / `gen_ai.output.messages` content |
+
+### Versioning note
+
+This document describes the Agent 365 backend contract as of May 2026. The payload format may evolve over time. The A365 SDK (`microsoft-agents-a365-observability-core`) handles format changes automatically and is the recommended path for production workloads that can accept the dependency.

From 292812fcf4acdedd142cbad35b876258fcd6733e Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:53:11 -0600
Subject: [PATCH 09/11] docs: cross-link to manual instrumentation guide

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/integrating-with-existing-opentelemetry.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/integrating-with-existing-opentelemetry.md b/docs/integrating-with-existing-opentelemetry.md
index 9e3fb123..9c639f0d 100644
--- a/docs/integrating-with-existing-opentelemetry.md
+++ b/docs/integrating-with-existing-opentelemetry.md
@@ -2,6 +2,8 @@
 
 This guide is for developers whose application **already** initializes OpenTelemetry — for example with `azure-monitor-opentelemetry`, an OTLP collector, or a vendor-specific exporter — and who want Agent 365 spans to flow alongside their existing telemetry. If you're starting fresh, see the [observability-core README](../libraries/microsoft-agents-a365-observability-core/README.md) for the standalone setup.
 
+> **Don't want the SDK dependency at all?** See [Manual Agent 365 span instrumentation](./manual-a365-span-instrumentation.md) for how to set the right attributes and export to the A365 backend using only `opentelemetry-sdk` + `requests`.
+
 ## The integration rule
 
 > **Initialize your existing OpenTelemetry stack first, then call Agent 365's `configure()`.** The SDK detects the existing `TracerProvider` and adds its processors to it. Your existing backend receives every span; the Agent 365 backend also receives spans when `ENABLE_A365_OBSERVABILITY_EXPORTER=true` and a `token_resolver` is provided (otherwise `configure()` falls back to `ConsoleSpanExporter`).

From c82a5c8c4e8156d4aede4405183da73aa16ab813 Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 09:57:44 -0600
Subject: [PATCH 10/11] chore: remove internal plan and spec files from repo

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ...-05-19-manual-a365-span-instrumentation.md | 1055 -----------------
 ...manual-a365-span-instrumentation-design.md |  237 ----
 2 files changed, 1292 deletions(-)
 delete mode 100644 docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md
 delete mode 100644 docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md

diff --git a/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md b/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md
deleted file mode 100644
index 55d15ad3..00000000
--- a/docs/superpowers/plans/2026-05-19-manual-a365-span-instrumentation.md
+++ /dev/null
@@ -1,1055 +0,0 @@
-# Manual A365 Span Instrumentation Guide — Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Create a comprehensive documentation guide (`docs/manual-a365-span-instrumentation.md`) that enables Python developers to manually add A365-compatible span attributes and export to the Agent 365 backend without depending on any `microsoft-agents-a365-*` package.
-
-**Architecture:** Single markdown document with tiered attribute tables, runnable Python code examples using only `opentelemetry-sdk` + `requests`, and full export protocol documentation. Each code snippet must be self-contained and copy-paste runnable.
-
-**Tech Stack:** Markdown, Python (opentelemetry-sdk, requests), Agent 365 observability backend HTTP API
-
----
-
-## File Structure
-
-| File | Responsibility |
-|------|---------------|
-| `docs/manual-a365-span-instrumentation.md` | The complete guide (single deliverable) |
-| `docs/integrating-with-existing-opentelemetry.md` | Existing guide — add a cross-link to the new doc |
-
----
-
-### Task 1: Scaffold the guide with intro and prerequisites
-
-**Files:**
-- Create: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Create the document with header, intro, and prerequisites**
-
-```markdown
-# Manual Agent 365 span instrumentation (without the SDK)
-
-This guide documents the **attribute contract** and **export protocol** for making your OpenTelemetry spans compatible with the Agent 365 observability backend — without importing any `microsoft-agents-a365-*` package.
-
-## When to use this guide
-
-Use this guide if you:
-
-- Have an existing Python application already instrumented with OpenTelemetry
-- Want your agent spans to appear in the Agent 365 portal
-- Prefer not to add the Agent 365 SDK as a dependency
-
-**When to use the SDK instead:** If you're starting fresh or can accept the dependency, the SDK (`microsoft-agents-a365-observability-core`) handles all of this automatically — attribute setting, span lifecycle, export, retries, and payload chunking. See [Integrating with existing OpenTelemetry](./integrating-with-existing-opentelemetry.md).
-
-## Prerequisites
-
-- Python 3.11+
-- `opentelemetry-sdk` (any recent version)
-- `requests` (for manual export to the A365 backend)
-- A registered Agent 365 agent (you'll need the `tenant_id` and `agent_id`)
-- A token resolver that can produce a Bearer token for the A365 ingestion endpoint
-
-Install dependencies:
-
-```bash
-pip install opentelemetry-sdk opentelemetry-api requests
-```
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: scaffold manual A365 span instrumentation guide"
-```
-
----
-
-### Task 2: Write the attribute contract tables
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add the attribute contract section with all three span type tables**
-
-Append after the Prerequisites section:
-
-```markdown
-## Attribute contract
-
-The Agent 365 backend filters spans by `gen_ai.operation.name` and routes them by `microsoft.tenant.id` + `gen_ai.agent.id`. Spans missing required attributes are silently dropped.
-
-### Accepted `gen_ai.operation.name` values
-
-Only spans with one of these values pass the backend's ingest filter:
-
-| Value | Span type |
-|-------|-----------|
-| `invoke_agent` | Top-level agent invocation |
-| `Chat` | Inference (manual instrumentation convention) |
-| `chat` | Inference (OTel GenAI semconv / auto-instrumentation) |
-| `TextCompletion` | Inference (text completion) |
-| `GenerateContent` | Inference (content generation) |
-| `execute_tool` | Tool execution |
-
-### `invoke_agent` span
-
-The top-level span representing one user turn / agent invocation.
-
-| Tier | Attribute | Expected value | Notes |
-|------|-----------|----------------|-------|
-| **Required** | `gen_ai.operation.name` | `"invoke_agent"` | Must match exactly |
-| **Required** | `microsoft.tenant.id` | Tenant GUID | Used for routing |
-| **Required** | `gen_ai.agent.id` | Agent GUID | Used for routing |
-| Recommended | `gen_ai.agent.name` | Human-readable agent name | Displayed in portal |
-| Recommended | `microsoft.session.id` | Session identifier | Groups turns in portal |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier | Thread grouping |
-| Recommended | `microsoft.a365.agent.blueprint.id` | Blueprint GUID | Links to agent definition |
-| Recommended | `microsoft.a365.agent.platform.id` | Platform identifier | Identifies hosting platform |
-| Recommended | `user.id` | End-user identifier | Portal user analytics |
-| Recommended | `server.address` | Server hostname | |
-| Optional | `gen_ai.agent.description` | Agent description | |
-| Optional | `gen_ai.agent.version` | Agent version string | |
-| Optional | `microsoft.agent.user.id` | Agent's service identity | |
-| Optional | `microsoft.agent.user.email` | Agent's service email | |
-| Optional | `user.email` | End-user email | |
-| Optional | `user.name` | End-user display name | |
-| Optional | `client.address` | Client IP or hostname | |
-| Optional | `microsoft.channel.name` | Channel (e.g. `"Teams"`, `"Webchat"`) | |
-| Optional | `microsoft.channel.link` | Channel URL | |
-| Optional | `gen_ai.input.messages` | JSON-serialized input messages | Can be large; may be truncated |
-| Optional | `microsoft.a365.caller.agent.name` | Calling agent name | For agent-to-agent calls |
-| Optional | `microsoft.a365.caller.agent.id` | Calling agent GUID | For agent-to-agent calls |
-| Optional | `microsoft.a365.caller.agent.blueprint.id` | Calling agent blueprint | For agent-to-agent calls |
-
-### `inference` span (LLM call)
-
-Child of `invoke_agent`. One per LLM inference call.
-
-| Tier | Attribute | Expected value | Notes |
-|------|-----------|----------------|-------|
-| **Required** | `gen_ai.operation.name` | `"Chat"` or `"TextCompletion"` or `"GenerateContent"` | See accepted values above |
-| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
-| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
-| **Required** | `gen_ai.request.model` | Model name (e.g. `"gpt-4o"`) | |
-| Recommended | `gen_ai.usage.input_tokens` | Integer | Token billing/monitoring |
-| Recommended | `gen_ai.usage.output_tokens` | Integer | Token billing/monitoring |
-| Recommended | `gen_ai.response.finish_reasons` | JSON array (e.g. `["stop"]`) | |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
-| Recommended | `gen_ai.provider.name` | `"openai"`, `"azure"`, etc. | |
-| Optional | `gen_ai.input.messages` | JSON-serialized input messages | |
-| Optional | `gen_ai.output.messages` | JSON-serialized output messages | |
-| Optional | `server.address` | LLM endpoint hostname | |
-| Optional | `server.port` | LLM endpoint port | Omit if 443 |
-| Optional | `microsoft.a365.agent.thought.process` | Agent reasoning trace | |
-
-### `execute_tool` span
-
-Child of `invoke_agent`. One per tool invocation.
-
-| Tier | Attribute | Expected value | Notes |
-|------|-----------|----------------|-------|
-| **Required** | `gen_ai.operation.name` | `"execute_tool"` | Must match exactly |
-| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
-| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
-| **Required** | `gen_ai.tool.name` | Tool function name | |
-| Recommended | `gen_ai.tool.call.id` | Tool call ID from LLM response | |
-| Recommended | `gen_ai.tool.call.arguments` | JSON-serialized arguments | |
-| Recommended | `gen_ai.tool.call.result` | JSON-serialized result | Set after execution |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
-| Optional | `gen_ai.tool.type` | `"function"` | |
-| Optional | `gen_ai.tool.description` | Tool description | |
-| Optional | `server.address` | Server hostname | |
-
-### Resource attributes
-
-Set these on your `TracerProvider`'s `Resource`:
-
-| Tier | Attribute | Expected value |
-|------|-----------|----------------|
-| **Required** | `service.name` | Your service/agent name |
-| Recommended | `service.namespace` | Your service namespace |
-
-### SDK-identifying attributes (optional)
-
-Set these on every span to identify your telemetry source:
-
-| Attribute | Value |
-|-----------|-------|
-| `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
-| `telemetry.sdk.language` | `"python"` |
-| `telemetry.sdk.version` | Your version string |
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add attribute contract tables to manual instrumentation guide"
-```
-
----
-
-### Task 3: Write Example 1 — minimal invoke_agent span
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add the examples section header and Example 1**
-
-Append after the attribute contract section:
-
-```markdown
-## Examples
-
-### Example 1: Minimal `invoke_agent` span
-
-Creates a single root span with only the required attributes and exports to console for verification.
-
-```python
-import json
-import uuid
-
-from opentelemetry import trace
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
-
-# --- Configuration (replace with your values) ---
-TENANT_ID = "your-tenant-guid"
-AGENT_ID = "your-agent-guid"
-AGENT_NAME = "my-weather-agent"
-
-# --- Set up OpenTelemetry with console export ---
-resource = Resource.create({"service.name": AGENT_NAME})
-provider = TracerProvider(resource=resource)
-provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
-trace.set_tracer_provider(provider)
-
-tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
-
-# --- Create an invoke_agent span ---
-with tracer.start_as_current_span(
-    name=f"invoke_agent {AGENT_NAME}",
-    kind=trace.SpanKind.INTERNAL,
-) as span:
-    # Required attributes
-    span.set_attribute("gen_ai.operation.name", "invoke_agent")
-    span.set_attribute("microsoft.tenant.id", TENANT_ID)
-    span.set_attribute("gen_ai.agent.id", AGENT_ID)
-
-    # Recommended attributes
-    span.set_attribute("gen_ai.agent.name", AGENT_NAME)
-    span.set_attribute("microsoft.session.id", str(uuid.uuid4()))
-    span.set_attribute("gen_ai.conversation.id", str(uuid.uuid4()))
-
-    # ... your agent logic here ...
-    print("Agent invoked successfully")
-
-# Flush to ensure spans are exported
-provider.force_flush()
-```
-
-Run this and you should see a JSON span dump on stdout with `gen_ai.operation.name: invoke_agent`.
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add Example 1 (minimal invoke_agent span)"
-```
-
----
-
-### Task 4: Write Example 2 — full agent turn with span hierarchy
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add Example 2 with all three span types**
-
-Append after Example 1:
-
-```markdown
-### Example 2: Full agent turn with span hierarchy
-
-Creates the proper parent-child relationship: `invoke_agent` → `inference` + `execute_tool`.
-
-```python
-import json
-import uuid
-
-from opentelemetry import trace
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
-
-# --- Configuration ---
-TENANT_ID = "your-tenant-guid"
-AGENT_ID = "your-agent-guid"
-AGENT_NAME = "my-weather-agent"
-MODEL_NAME = "gpt-4o"
-PROVIDER_NAME = "azure"
-
-# --- OpenTelemetry setup ---
-resource = Resource.create({
-    "service.name": AGENT_NAME,
-    "service.namespace": "my-namespace",
-})
-provider = TracerProvider(resource=resource)
-provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
-trace.set_tracer_provider(provider)
-
-tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
-
-# --- Simulate an agent turn ---
-session_id = str(uuid.uuid4())
-conversation_id = str(uuid.uuid4())
-user_message = "What's the weather in Seattle?"
-
-
-def get_weather(city: str) -> str:
-    """Simulated tool."""
-    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
-
-
-# Top-level: invoke_agent
-with tracer.start_as_current_span(
-    name=f"invoke_agent {AGENT_NAME}",
-    kind=trace.SpanKind.INTERNAL,
-) as agent_span:
-    agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
-    agent_span.set_attribute("microsoft.tenant.id", TENANT_ID)
-    agent_span.set_attribute("gen_ai.agent.id", AGENT_ID)
-    agent_span.set_attribute("gen_ai.agent.name", AGENT_NAME)
-    agent_span.set_attribute("microsoft.session.id", session_id)
-    agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
-    agent_span.set_attribute("user.id", "user-123")
-    agent_span.set_attribute("gen_ai.input.messages", json.dumps([
-        {"role": "user", "content": user_message}
-    ]))
-
-    # Child: inference (LLM call)
-    with tracer.start_as_current_span(
-        name=f"Chat {MODEL_NAME}",
-        kind=trace.SpanKind.INTERNAL,
-    ) as inference_span:
-        inference_span.set_attribute("gen_ai.operation.name", "Chat")
-        inference_span.set_attribute("microsoft.tenant.id", TENANT_ID)
-        inference_span.set_attribute("gen_ai.agent.id", AGENT_ID)
-        inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
-        inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
-        inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
-        inference_span.set_attribute("server.address", "my-resource.openai.azure.com")
-
-        # ... call your LLM here ...
-        # After response:
-        inference_span.set_attribute("gen_ai.usage.input_tokens", 42)
-        inference_span.set_attribute("gen_ai.usage.output_tokens", 15)
-        inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
-
-    # Child: execute_tool
-    tool_call_id = "call_abc123"
-    tool_name = "get_weather"
-    tool_args = json.dumps({"city": "Seattle"})
-
-    with tracer.start_as_current_span(
-        name=f"execute_tool {tool_name}",
-        kind=trace.SpanKind.INTERNAL,
-    ) as tool_span:
-        tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
-        tool_span.set_attribute("microsoft.tenant.id", TENANT_ID)
-        tool_span.set_attribute("gen_ai.agent.id", AGENT_ID)
-        tool_span.set_attribute("gen_ai.tool.name", tool_name)
-        tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
-        tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
-        tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
-        tool_span.set_attribute("gen_ai.tool.type", "function")
-
-        # Execute the tool
-        result = get_weather("Seattle")
-        tool_span.set_attribute("gen_ai.tool.call.result", result)
-
-provider.force_flush()
-```
-
-You should see three spans in the console output: `invoke_agent my-weather-agent` (root), `Chat gpt-4o` (child), and `execute_tool get_weather` (child). Verify that `parentSpanId` on the children matches the root's `spanId`.
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add Example 2 (full agent turn with span hierarchy)"
-```
-
----
-
-### Task 5: Write the export protocol section
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add the export protocol documentation**
-
-Append after Example 2:
-
-```markdown
-## Exporting to the Agent 365 backend
-
-The Agent 365 backend does **not** accept standard OTLP protobuf or OTLP/HTTP JSON. It uses a custom OTLP-like JSON format. This section documents the HTTP contract.
-
-### Endpoint
-
-```
-POST https://agent365.svc.cloud.microsoft/observability/tenants/{tenantId}/otlp/agents/{agentId}/traces?api-version=1
-```
-
-Replace `{tenantId}` and `{agentId}` with the values from your span attributes (`microsoft.tenant.id` and `gen_ai.agent.id`).
-
-### Authentication
-
-Every request requires a Bearer token:
-
-```
-Authorization: Bearer <token>
-Content-Type: application/json
-```
-
-The token is obtained from a **token resolver** — a function with signature:
-
-```python
-def resolve_token(agent_id: str, tenant_id: str) -> str:
-    """Return a valid Bearer token for the given agent and tenant."""
-    ...
-```
-
-How you implement this depends on your environment (MSAL client credentials, managed identity, etc.). The A365 SDK uses this same interface internally.
-
-### Payload format
-
-The body is JSON with this structure:
-
-```json
-{
-  "resourceSpans": [
-    {
-      "resource": {
-        "attributes": {
-          "service.name": "my-agent",
-          "service.namespace": "my-namespace"
-        }
-      },
-      "scopeSpans": [
-        {
-          "scope": {
-            "name": "my-agent-instrumentation",
-            "version": "1.0.0"
-          },
-          "spans": [
-            {
-              "traceId": "0af7651916cd43dd8448eb211c80319c",
-              "spanId": "b7ad6b7169203331",
-              "parentSpanId": null,
-              "name": "invoke_agent my-agent",
-              "kind": "INTERNAL",
-              "startTimeUnixNano": 1716000000000000000,
-              "endTimeUnixNano": 1716000001000000000,
-              "attributes": {
-                "gen_ai.operation.name": "invoke_agent",
-                "microsoft.tenant.id": "tenant-guid",
-                "gen_ai.agent.id": "agent-guid"
-              },
-              "events": null,
-              "links": null,
-              "status": {
-                "code": "OK",
-                "message": ""
-              }
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
-```
-
-### Field reference
-
-| Field | Type | Description |
-|-------|------|-------------|
-| `traceId` | string | 32 hex chars (128-bit trace ID) |
-| `spanId` | string | 16 hex chars (64-bit span ID) |
-| `parentSpanId` | string \| null | Parent's spanId, or null for root |
-| `name` | string | Span name (see naming conventions below) |
-| `kind` | string | Span kind name: `"INTERNAL"`, `"CLIENT"`, `"SERVER"`, etc. |
-| `startTimeUnixNano` | integer | Start time in nanoseconds since Unix epoch |
-| `endTimeUnixNano` | integer | End time in nanoseconds since Unix epoch |
-| `attributes` | object \| null | Key-value map of span attributes |
-| `events` | array \| null | Span events (exceptions, logs) |
-| `links` | array \| null | Span links |
-| `status.code` | string | `"UNSET"`, `"OK"`, or `"ERROR"` |
-| `status.message` | string | Error description (empty for non-error) |
-
-### Span name conventions
-
-| Span type | Name format | Example |
-|-----------|-------------|---------|
-| invoke_agent | `"invoke_agent"` or `"invoke_agent <agent_name>"` | `"invoke_agent my-weather-agent"` |
-| inference | `"<operation> <model>"` | `"Chat gpt-4o"` |
-| execute_tool | `"execute_tool <tool_name>"` | `"execute_tool get_weather"` |
-
-### Constraints
-
-| Constraint | Value | Behavior |
-|------------|-------|----------|
-| Max payload size | ~900,000 bytes | Split spans across multiple POST requests |
-| Max individual span | 250,000 bytes | Largest attributes are replaced with `"TRUNCATED"` |
-| Retry on | 408, 429, 5xx | Exponential backoff; respect `Retry-After` header for 429 |
-| Fail on | Other 4xx | Non-retryable; check auth and payload format |
-| Timeout | 30 seconds | Per-request HTTP timeout |
-
-### Grouping requirement
-
-All spans in a single POST must share the same `microsoft.tenant.id` and `gen_ai.agent.id`. If your batch contains spans for multiple tenants or agents, partition them into separate requests.
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add export protocol section to manual instrumentation guide"
-```
-
----
-
-### Task 6: Write Example 3 — DIY exporter
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add Example 3 with a custom SpanExporter implementation**
-
-Append after the export protocol section:
-
-```markdown
-### Example 3: Custom exporter for the Agent 365 backend
-
-A minimal `SpanExporter` that builds the JSON envelope and POSTs to the A365 endpoint. This replaces the SDK's `_Agent365Exporter` without any A365 dependency.
-
-```python
-import json
-import logging
-import time
-from collections.abc import Sequence
-
-import requests
-from opentelemetry.sdk.trace import ReadableSpan
-from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
-from opentelemetry.trace import StatusCode
-
-logger = logging.getLogger(__name__)
-
-# Accepted operation names — spans with other values are filtered out
-ACCEPTED_OPERATIONS = frozenset({
-    "invoke_agent", "execute_tool", "chat", "Chat", "TextCompletion", "GenerateContent",
-})
-
-A365_ENDPOINT = "https://agent365.svc.cloud.microsoft"
-MAX_RETRIES = 3
-HTTP_TIMEOUT = 30.0
-
-
-class Agent365ManualExporter(SpanExporter):
-    """Minimal exporter that POSTs spans to the Agent 365 backend."""
-
-    def __init__(self, token_resolver):
-        """
-        Args:
-            token_resolver: Callable(agent_id, tenant_id) -> bearer_token string.
-        """
-        self._token_resolver = token_resolver
-        self._session = requests.Session()
-
-    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
-        # Partition by (tenant_id, agent_id)
-        groups = self._partition(spans)
-        if not groups:
-            return SpanExportResult.SUCCESS
-
-        any_failure = False
-        for (tenant_id, agent_id), group_spans in groups.items():
-            url = (
-                f"{A365_ENDPOINT}/observability/tenants/{tenant_id}"
-                f"/otlp/agents/{agent_id}/traces?api-version=1"
-            )
-            payload = self._build_payload(group_spans)
-            body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
-
-            # Resolve auth token
-            try:
-                token = self._token_resolver(agent_id, tenant_id)
-            except Exception as e:
-                logger.error(f"Token resolution failed: {e}")
-                any_failure = True
-                continue
-
-            headers = {
-                "content-type": "application/json",
-                "authorization": f"Bearer {token}",
-            }
-
-            if not self._post_with_retries(url, body, headers):
-                any_failure = True
-
-        return SpanExportResult.FAILURE if any_failure else SpanExportResult.SUCCESS
-
-    def shutdown(self):
-        self._session.close()
-
-    def _partition(
-        self, spans: Sequence[ReadableSpan]
-    ) -> dict[tuple[str, str], list[ReadableSpan]]:
-        """Filter eligible spans and group by (tenant_id, agent_id)."""
-        groups: dict[tuple[str, str], list[ReadableSpan]] = {}
-        for sp in spans:
-            attrs = sp.attributes or {}
-            op_name = str(attrs.get("gen_ai.operation.name", ""))
-            if op_name not in ACCEPTED_OPERATIONS:
-                continue
-            tenant = str(attrs.get("microsoft.tenant.id", ""))
-            agent = str(attrs.get("gen_ai.agent.id", ""))
-            if not tenant or not agent:
-                continue
-            groups.setdefault((tenant, agent), []).append(sp)
-        return groups
-
-    def _build_payload(self, spans: Sequence[ReadableSpan]) -> dict:
-        """Build the OTLP-like JSON envelope."""
-        # Get resource attributes from the first span
-        resource_attrs = {}
-        if spans and spans[0].resource:
-            resource_attrs = dict(spans[0].resource.attributes)
-
-        # Group spans by instrumentation scope
-        scope_map: dict[tuple[str, str | None], list[dict]] = {}
-        for sp in spans:
-            scope = sp.instrumentation_scope
-            scope_name = scope.name if scope else "unknown"
-            scope_version = scope.version if scope else None
-            scope_map.setdefault((scope_name, scope_version), []).append(
-                self._map_span(sp)
-            )
-
-        scope_spans = [
-            {"scope": {"name": name, "version": version}, "spans": mapped}
-            for (name, version), mapped in scope_map.items()
-        ]
-
-        return {
-            "resourceSpans": [
-                {
-                    "resource": {"attributes": resource_attrs or None},
-                    "scopeSpans": scope_spans,
-                }
-            ]
-        }
-
-    @staticmethod
-    def _map_span(sp: ReadableSpan) -> dict:
-        """Convert a ReadableSpan to the A365 JSON format."""
-        ctx = sp.context
-        trace_id = f"{ctx.trace_id:032x}"
-        span_id = f"{ctx.span_id:016x}"
-        parent_span_id = None
-        if sp.parent and sp.parent.span_id:
-            parent_span_id = f"{sp.parent.span_id:016x}"
-
-        attrs = dict(sp.attributes or {})
-
-        # Map events
-        events = None
-        if sp.events:
-            events = [
-                {
-                    "timeUnixNano": ev.timestamp,
-                    "name": ev.name,
-                    "attributes": dict(ev.attributes) if ev.attributes else None,
-                }
-                for ev in sp.events
-            ]
-
-        # Map status
-        status_code = sp.status.status_code if sp.status else StatusCode.UNSET
-        status = {
-            "code": status_code.name,
-            "message": getattr(sp.status, "description", "") or "",
-        }
-
-        return {
-            "traceId": trace_id,
-            "spanId": span_id,
-            "parentSpanId": parent_span_id,
-            "name": sp.name,
-            "kind": sp.kind.name,
-            "startTimeUnixNano": sp.start_time,
-            "endTimeUnixNano": sp.end_time,
-            "attributes": attrs or None,
-            "events": events,
-            "links": None,
-            "status": status,
-        }
-
-    def _post_with_retries(self, url: str, body: str, headers: dict) -> bool:
-        """POST with exponential backoff on transient errors."""
-        for attempt in range(MAX_RETRIES + 1):
-            try:
-                resp = self._session.post(
-                    url, data=body, headers=headers, timeout=HTTP_TIMEOUT
-                )
-                if 200 <= resp.status_code < 300:
-                    return True
-                if resp.status_code in (408, 429) or resp.status_code >= 500:
-                    if attempt < MAX_RETRIES:
-                        # Respect Retry-After for 429
-                        retry_after = resp.headers.get("Retry-After")
-                        if retry_after and retry_after.isdigit():
-                            time.sleep(min(float(retry_after), 60.0))
-                        else:
-                            time.sleep(0.5 * (2 ** attempt))
-                        continue
-                logger.error(f"HTTP {resp.status_code}: {resp.text[:200]}")
-                return False
-            except requests.RequestException as e:
-                if attempt < MAX_RETRIES:
-                    time.sleep(0.5 * (2 ** attempt))
-                    continue
-                logger.error(f"Request failed after {MAX_RETRIES + 1} attempts: {e}")
-                return False
-        return False
-```
-
-**Usage:**
-
-```python
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-
-def my_token_resolver(agent_id: str, tenant_id: str) -> str:
-    # Your token acquisition logic here (MSAL, managed identity, etc.)
-    return "your-bearer-token"
-
-exporter = Agent365ManualExporter(token_resolver=my_token_resolver)
-provider.add_span_processor(BatchSpanProcessor(exporter))
-```
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add Example 3 (DIY Agent365 exporter)"
-```
-
----
-
-### Task 7: Write the end-to-end example
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add the end-to-end example combining all pieces**
-
-Append after Example 3:
-
-```markdown
-### Example 4: End-to-end agent loop with A365 export
-
-Combines everything: proper span hierarchy, all recommended attributes, and export to the Agent 365 backend.
-
-```python
-"""
-Complete example: manually instrumented agent with A365 export.
-
-Requirements:
-    pip install opentelemetry-sdk opentelemetry-api requests openai
-
-Replace the placeholder values with your actual tenant ID, agent ID,
-and token resolver implementation.
-"""
-
-import json
-import uuid
-
-from opentelemetry import trace
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
-
-# --- Configuration ---
-TENANT_ID = "your-tenant-guid"
-AGENT_ID = "your-agent-guid"
-AGENT_NAME = "my-weather-agent"
-SERVICE_NAMESPACE = "my-namespace"
-MODEL_NAME = "gpt-4o"
-PROVIDER_NAME = "azure"
-SERVER_ADDRESS = "my-resource.openai.azure.com"
-
-
-def my_token_resolver(agent_id: str, tenant_id: str) -> str:
-    """Replace with your actual token acquisition logic."""
-    raise NotImplementedError("Implement your token resolver")
-
-
-# --- OpenTelemetry setup ---
-resource = Resource.create({
-    "service.name": AGENT_NAME,
-    "service.namespace": SERVICE_NAMESPACE,
-})
-provider = TracerProvider(resource=resource)
-
-# For development: console export to verify spans locally
-provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
-
-# For production: uncomment to export to Agent 365 backend
-# from agent365_exporter import Agent365ManualExporter  # Example 3 above
-# provider.add_span_processor(BatchSpanProcessor(
-#     Agent365ManualExporter(token_resolver=my_token_resolver)
-# ))
-
-trace.set_tracer_provider(provider)
-tracer = trace.get_tracer("my-agent-instrumentation", "1.0.0")
-
-# --- Common attributes helper ---
-COMMON_ATTRS = {
-    "microsoft.tenant.id": TENANT_ID,
-    "gen_ai.agent.id": AGENT_ID,
-    "gen_ai.agent.name": AGENT_NAME,
-    "telemetry.sdk.name": "A365ObservabilitySDK",
-    "telemetry.sdk.language": "python",
-    "telemetry.sdk.version": "1.0.0",
-}
-
-
-def set_common_attrs(span):
-    for key, value in COMMON_ATTRS.items():
-        span.set_attribute(key, value)
-
-
-# --- Simulated tools ---
-def get_weather(city: str) -> str:
-    return json.dumps({"city": city, "temp_f": 62, "condition": "cloudy"})
-
-
-# --- Agent turn ---
-def handle_user_turn(user_message: str, user_id: str):
-    session_id = str(uuid.uuid4())
-    conversation_id = str(uuid.uuid4())
-
-    with tracer.start_as_current_span(
-        name=f"invoke_agent {AGENT_NAME}",
-        kind=trace.SpanKind.INTERNAL,
-    ) as agent_span:
-        set_common_attrs(agent_span)
-        agent_span.set_attribute("gen_ai.operation.name", "invoke_agent")
-        agent_span.set_attribute("microsoft.session.id", session_id)
-        agent_span.set_attribute("gen_ai.conversation.id", conversation_id)
-        agent_span.set_attribute("user.id", user_id)
-        agent_span.set_attribute("gen_ai.input.messages", json.dumps([
-            {"role": "user", "content": user_message}
-        ]))
-
-        # Step 1: Call the LLM
-        with tracer.start_as_current_span(
-            name=f"Chat {MODEL_NAME}",
-            kind=trace.SpanKind.INTERNAL,
-        ) as inference_span:
-            set_common_attrs(inference_span)
-            inference_span.set_attribute("gen_ai.operation.name", "Chat")
-            inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
-            inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
-            inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
-            inference_span.set_attribute("server.address", SERVER_ADDRESS)
-
-            # ... your LLM call here ...
-            # Simulate response with tool call
-            inference_span.set_attribute("gen_ai.usage.input_tokens", 55)
-            inference_span.set_attribute("gen_ai.usage.output_tokens", 22)
-            inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["tool_calls"]))
-
-        # Step 2: Execute the tool
-        tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
-        tool_name = "get_weather"
-        tool_args = json.dumps({"city": "Seattle"})
-
-        with tracer.start_as_current_span(
-            name=f"execute_tool {tool_name}",
-            kind=trace.SpanKind.INTERNAL,
-        ) as tool_span:
-            set_common_attrs(tool_span)
-            tool_span.set_attribute("gen_ai.operation.name", "execute_tool")
-            tool_span.set_attribute("gen_ai.tool.name", tool_name)
-            tool_span.set_attribute("gen_ai.tool.call.id", tool_call_id)
-            tool_span.set_attribute("gen_ai.tool.call.arguments", tool_args)
-            tool_span.set_attribute("gen_ai.conversation.id", conversation_id)
-            tool_span.set_attribute("gen_ai.tool.type", "function")
-
-            result = get_weather("Seattle")
-            tool_span.set_attribute("gen_ai.tool.call.result", result)
-
-        # Step 3: Final LLM call with tool result
-        with tracer.start_as_current_span(
-            name=f"Chat {MODEL_NAME}",
-            kind=trace.SpanKind.INTERNAL,
-        ) as final_inference_span:
-            set_common_attrs(final_inference_span)
-            final_inference_span.set_attribute("gen_ai.operation.name", "Chat")
-            final_inference_span.set_attribute("gen_ai.request.model", MODEL_NAME)
-            final_inference_span.set_attribute("gen_ai.provider.name", PROVIDER_NAME)
-            final_inference_span.set_attribute("gen_ai.conversation.id", conversation_id)
-            final_inference_span.set_attribute("server.address", SERVER_ADDRESS)
-
-            # ... your LLM call with tool result here ...
-            final_inference_span.set_attribute("gen_ai.usage.input_tokens", 85)
-            final_inference_span.set_attribute("gen_ai.usage.output_tokens", 45)
-            final_inference_span.set_attribute("gen_ai.response.finish_reasons", json.dumps(["stop"]))
-
-
-# --- Run ---
-if __name__ == "__main__":
-    handle_user_turn("What's the weather in Seattle?", user_id="user-456")
-    provider.force_flush()
-    print("Done — check console output for spans")
-```
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add Example 4 (end-to-end agent loop)"
-```
-
----
-
-### Task 8: Write validation and troubleshooting section
-
-**Files:**
-- Modify: `docs/manual-a365-span-instrumentation.md`
-
-- [ ] **Step 1: Add validation and troubleshooting**
-
-Append at the end of the document:
-
-```markdown
-## Validation and troubleshooting
-
-### Verifying locally
-
-1. Use `ConsoleSpanExporter` (shown in the examples above) to dump spans to stdout
-2. Check that each span has:
-   - A `gen_ai.operation.name` from the [accepted values list](#accepted-gen_aioperation_name-values)
-   - Both `microsoft.tenant.id` and `gen_ai.agent.id` set to non-empty strings
-   - Correct parent-child relationships (`parentSpanId` on children matches root's `spanId`)
-
-### Verifying against the backend
-
-After switching to the `Agent365ManualExporter`:
-
-1. **HTTP 200–299** → spans accepted. They should appear in the Agent 365 portal within a few minutes.
-2. **HTTP 401/403** → token resolver returned an invalid or expired token. Check your auth implementation.
-3. **HTTP 400** → payload format is wrong. Validate your JSON against the [payload format](#payload-format) section.
-4. **HTTP 429** → rate limited. The exporter should respect `Retry-After` and retry automatically.
-5. **No response / timeout** → check network connectivity to `agent365.svc.cloud.microsoft`.
-
-### Common issues
-
-| Symptom | Cause | Fix |
-|---------|-------|-----|
-| Spans don't appear in portal | `gen_ai.operation.name` not in accepted list | Use exactly `"invoke_agent"`, `"Chat"`, or `"execute_tool"` |
-| Spans silently dropped | Missing `microsoft.tenant.id` or `gen_ai.agent.id` | Ensure both are set on every span |
-| HTTP 400 from backend | Payload structure doesn't match expected format | Verify JSON envelope matches the documented structure |
-| HTTP 401 from backend | Token resolver returns wrong/expired token | Debug your token acquisition; ensure scope matches |
-| Only `invoke_agent` spans visible | Child spans missing required identity attrs | Set `microsoft.tenant.id` and `gen_ai.agent.id` on ALL spans, not just the root |
-| Large spans truncated | Span exceeds 250KB | Reduce `gen_ai.input.messages` / `gen_ai.output.messages` content |
-
-### Versioning note
-
-This document describes the Agent 365 backend contract as of May 2026. The payload format may evolve over time. The A365 SDK (`microsoft-agents-a365-observability-core`) handles format changes automatically and is the recommended path for production workloads that can accept the dependency.
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/manual-a365-span-instrumentation.md
-git commit -m "docs: add validation and troubleshooting section"
-```
-
----
-
-### Task 9: Add cross-link from existing integration guide
-
-**Files:**
-- Modify: `docs/integrating-with-existing-opentelemetry.md` (add a callout near the top)
-
-- [ ] **Step 1: Add a cross-reference after the first paragraph**
-
-After line 3 (the intro paragraph ending with "...for the standalone setup."), add:
-
-```markdown
-
-> **Don't want the SDK dependency at all?** See [Manual Agent 365 span instrumentation](./manual-a365-span-instrumentation.md) for how to set the right attributes and export to the A365 backend using only `opentelemetry-sdk` + `requests`.
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add docs/integrating-with-existing-opentelemetry.md
-git commit -m "docs: cross-link to manual instrumentation guide"
-```
-
----
-
-### Task 10: Final review and push
-
-**Files:**
-- Review: `docs/manual-a365-span-instrumentation.md` (full read-through)
-
-- [ ] **Step 1: Review the complete document for consistency**
-
-Read through `docs/manual-a365-span-instrumentation.md` end-to-end and verify:
-- All attribute keys in examples match the attribute tables exactly
-- All code snippets use consistent variable names (`TENANT_ID`, `AGENT_ID`, etc.)
-- No broken internal markdown links
-- Payload format example matches the exporter code structure
-
-- [ ] **Step 2: Run a quick markdown lint (if available)**
-
-```bash
-# Optional: check for markdown issues
-cat docs/manual-a365-span-instrumentation.md | head -5
-```
-
-- [ ] **Step 3: Push the branch**
-
-```bash
-git push origin docs/manual-a365-span-instrumentation
-```
-
-- [ ] **Step 4: Create PR**
-
-```bash
-gh pr create --title "docs: add manual A365 span instrumentation guide (without SDK)" \
-  --body "Adds documentation for teams that want A365 portal compatibility without the SDK dependency.
-
-Covers:
-- Tiered attribute contract (required/recommended/optional) for all three span types
-- Export protocol (endpoint, auth, payload format, constraints)
-- Complete runnable Python examples using only opentelemetry-sdk + requests
-- Custom SpanExporter implementation for the A365 backend
-- Validation and troubleshooting guide
-
-Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" \
-  --base main
-```
diff --git a/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md b/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md
deleted file mode 100644
index a621cb2a..00000000
--- a/docs/superpowers/specs/2026-05-19-manual-a365-span-instrumentation-design.md
+++ /dev/null
@@ -1,237 +0,0 @@
-# Design: Manual Agent 365 Span Instrumentation (without the SDK)
-
-**Date:** 2026-05-19
-**Status:** Approved
-**Branch:** `docs/manual-a365-span-instrumentation`
-
-## Problem Statement
-
-Teams with existing OpenTelemetry-instrumented Python applications want their spans to appear in the Agent 365 portal without taking a dependency on any `microsoft-agents-a365-*` package. They need a documented attribute contract and export protocol so they can manually set the right span attributes and POST to the A365 ingestion endpoint using only `opentelemetry-sdk` and `requests`.
-
-## Audience
-
-Python developers who:
-- Already have OpenTelemetry configured (any exporter)
-- Want A365 portal compatibility without importing the A365 SDK
-- Need to understand the exact attribute contract the backend expects
-
-## Deliverable
-
-A single documentation file: `docs/manual-a365-span-instrumentation.md`
-
-## Document Structure
-
-| Section | Content |
-|---------|---------|
-| When to use this guide | Audience, prerequisites, when to use the SDK instead |
-| Attribute contract | 3 tiered tables (required/recommended/optional) per span type |
-| Resource attributes | What to set on the TracerProvider resource |
-| SDK-identifying attributes | Optional telemetry.sdk.* attrs for identification |
-| Complete examples | 3 runnable Python snippets (minimal → full → export) |
-| Exporting to Agent 365 | Endpoint URL, auth, payload format, size limits, retry |
-| End-to-end example | Full agent loop with proper span hierarchy + export |
-| Validation & troubleshooting | Verify spans arrive; common rejection reasons |
-
-## Attribute Contract
-
-### Span Type: `invoke_agent`
-
-The top-level span representing one user turn / agent invocation.
-
-| Tier | Attribute | Expected Value |
-|------|-----------|----------------|
-| **Required** | `gen_ai.operation.name` | `"invoke_agent"` |
-| **Required** | `microsoft.tenant.id` | Tenant GUID |
-| **Required** | `gen_ai.agent.id` | Agent GUID |
-| Recommended | `gen_ai.agent.name` | Human-readable agent name |
-| Recommended | `microsoft.session.id` | Session identifier |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier |
-| Recommended | `microsoft.a365.agent.blueprint.id` | Blueprint identifier |
-| Recommended | `microsoft.a365.agent.platform.id` | Platform identifier |
-| Recommended | `user.id` | End-user identifier |
-| Recommended | `server.address` | Server hostname |
-| Optional | `gen_ai.agent.description` | Agent description |
-| Optional | `gen_ai.agent.version` | Agent version string |
-| Optional | `microsoft.agent.user.id` | Agent's user identity |
-| Optional | `microsoft.agent.user.email` | Agent's user email |
-| Optional | `user.email` | End-user email |
-| Optional | `user.name` | End-user display name |
-| Optional | `client.address` | Client IP/hostname |
-| Optional | `microsoft.channel.name` | Channel name (Teams, Webchat, etc.) |
-| Optional | `microsoft.channel.link` | Channel link/URL |
-| Optional | `gen_ai.input.messages` | JSON-serialized input messages |
-| Optional | `microsoft.a365.caller.agent.name` | Calling agent name (agent-to-agent) |
-| Optional | `microsoft.a365.caller.agent.id` | Calling agent ID (agent-to-agent) |
-
-### Span Type: `inference` (LLM call)
-
-Child of `invoke_agent`. One per LLM inference call.
-
-| Tier | Attribute | Expected Value |
-|------|-----------|----------------|
-| **Required** | `gen_ai.operation.name` | `"Chat"` (or `"TextCompletion"` / `"GenerateContent"`) |
-| **Required** | `microsoft.tenant.id` | Tenant GUID |
-| **Required** | `gen_ai.agent.id` | Agent GUID |
-| **Required** | `gen_ai.request.model` | Model name (e.g. `"gpt-4o"`) |
-| Recommended | `gen_ai.usage.input_tokens` | Integer token count |
-| Recommended | `gen_ai.usage.output_tokens` | Integer token count |
-| Recommended | `gen_ai.response.finish_reasons` | JSON array of finish reasons |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier |
-| Recommended | `gen_ai.provider.name` | Provider (e.g. `"openai"`, `"azure"`) |
-| Optional | `gen_ai.input.messages` | JSON-serialized input messages |
-| Optional | `gen_ai.output.messages` | JSON-serialized output messages |
-| Optional | `server.address` | LLM endpoint hostname |
-| Optional | `server.port` | LLM endpoint port (omit if 443) |
-| Optional | `microsoft.a365.agent.thought.process` | Agent reasoning trace |
-
-### Span Type: `execute_tool`
-
-Child of `invoke_agent`. One per tool invocation.
-
-| Tier | Attribute | Expected Value |
-|------|-----------|----------------|
-| **Required** | `gen_ai.operation.name` | `"execute_tool"` |
-| **Required** | `microsoft.tenant.id` | Tenant GUID |
-| **Required** | `gen_ai.agent.id` | Agent GUID |
-| **Required** | `gen_ai.tool.name` | Tool function name |
-| Recommended | `gen_ai.tool.call.id` | Tool call ID from LLM response |
-| Recommended | `gen_ai.tool.call.arguments` | JSON-serialized arguments |
-| Recommended | `gen_ai.tool.call.result` | JSON-serialized result |
-| Recommended | `gen_ai.conversation.id` | Conversation identifier |
-| Optional | `gen_ai.tool.type` | Tool type (e.g. `"function"`) |
-| Optional | `gen_ai.tool.description` | Tool description |
-| Optional | `server.address` | Server hostname |
-
-### Resource Attributes (on TracerProvider)
-
-| Tier | Attribute | Expected Value |
-|------|-----------|----------------|
-| **Required** | `service.name` | Your service/agent name |
-| Recommended | `service.namespace` | Your service namespace |
-
-### SDK-Identifying Attributes (on all spans)
-
-These are optional but help the backend identify the telemetry source:
-
-| Attribute | Value |
-|-----------|-------|
-| `telemetry.sdk.name` | `"A365ObservabilitySDK"` (or your own identifier) |
-| `telemetry.sdk.language` | `"python"` |
-| `telemetry.sdk.version` | Your version string |
-
-## Export Protocol
-
-### Endpoint
-
-```
-POST https://agent365.svc.cloud.microsoft/observability/tenants/{tenantId}/otlp/agents/{agentId}/traces?api-version=1
-```
-
-Where `{tenantId}` and `{agentId}` come from the span attributes `microsoft.tenant.id` and `gen_ai.agent.id`.
-
-### Authentication
-
-```
-Authorization: Bearer <token>
-Content-Type: application/json
-```
-
-Token is obtained from a resolver function with signature: `(agent_id: str, tenant_id: str) -> str`
-
-The guide will document the interface but not prescribe a specific token acquisition method (MSAL, managed identity, etc.) since that depends on the deployment environment.
-
-### Payload Format
-
-OTLP-like JSON (not standard OTLP protobuf):
-
-```json
-{
-  "resourceSpans": [
-    {
-      "resource": {
-        "attributes": { "service.name": "my-agent", "service.namespace": "my-ns" }
-      },
-      "scopeSpans": [
-        {
-          "scope": { "name": "my-instrumentor", "version": "1.0.0" },
-          "spans": [
-            {
-              "traceId": "0af7651916cd43dd8448eb211c80319c",
-              "spanId": "b7ad6b7169203331",
-              "parentSpanId": null,
-              "name": "invoke_agent my-agent",
-              "kind": "INTERNAL",
-              "startTimeUnixNano": 1716000000000000000,
-              "endTimeUnixNano": 1716000001000000000,
-              "attributes": { "gen_ai.operation.name": "invoke_agent", "..." : "..." },
-              "events": null,
-              "links": null,
-              "status": { "code": "OK", "message": "" }
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
-```
-
-### Constraints
-
-| Constraint | Value | Behavior on violation |
-|------------|-------|----------------------|
-| Max payload size | ~900,000 bytes | Split into multiple POSTs (chunks) |
-| Max individual span size | 250,000 bytes | Largest attributes truncated to `"TRUNCATED"` |
-| Required span filter | `gen_ai.operation.name` ∈ `{invoke_agent, execute_tool, chat, Chat}` | Spans with other values are silently dropped |
-| Required identity | Both `microsoft.tenant.id` and `gen_ai.agent.id` present and non-empty | Spans without both are silently dropped |
-| Retryable HTTP codes | 408, 429, 5xx | Retry with exponential backoff (respect `Retry-After` for 429) |
-| Non-retryable HTTP codes | Other 4xx | Fail immediately |
-
-### Span Name Convention
-
-| Span type | Span name format |
-|-----------|-----------------|
-| invoke_agent | `"invoke_agent"` or `"invoke_agent <agent_name>"` |
-| inference | `"<operation> <model>"` (e.g. `"Chat gpt-4o"`) |
-| execute_tool | `"execute_tool <tool_name>"` |
-
-## Examples Plan
-
-### Example 1: Minimal invoke_agent span
-
-Creates a single root span with only required attributes, exports to `ConsoleSpanExporter` for verification.
-
-### Example 2: Full agent turn with hierarchy
-
-Creates `invoke_agent` → `inference` + `execute_tool` children with all recommended attributes. Still uses console export.
-
-### Example 3: DIY export to Agent 365 backend
-
-Implements a minimal custom `SpanExporter` that builds the JSON envelope and POSTs to the A365 endpoint with Bearer auth. Shows the complete flow from span creation to backend ingestion without any A365 package.
-
-### Example 4: End-to-end agent loop
-
-Combines examples 2 + 3 into a realistic agent loop: receive user message → invoke_agent span → call OpenAI (inference span) → execute tool (execute_tool span) → export to A365.
-
-## Validation & Troubleshooting
-
-The guide will include:
-- How to verify spans appear in the A365 portal after export
-- Common HTTP error codes and what they mean
-- Checklist: "My spans aren't showing up" (missing required attrs, wrong operation name, auth failure, payload too large)
-
-## Key Design Decisions
-
-1. **Zero A365 package dependency** — only `opentelemetry-sdk` and `requests` required
-2. **Tiered attribute contract** — Required (backend drops without) / Recommended (enables features) / Optional (enrichment)
-3. **Document the allow-list explicitly** — spans with `gen_ai.operation.name` not in the set are filtered
-4. **Token resolver interface documented, not implementation** — users bring their own auth
-5. **Custom exporter example, not OTLPSpanExporter** — A365 backend uses a custom JSON format, not standard OTLP
-6. **Versioning caveat** — the guide will note that the payload format is a contract that may evolve; the SDK handles this automatically and is the recommended path for production
-
-## Out of Scope
-
-- Token acquisition implementation (MSAL, managed identity, etc.)
-- Multi-language support (future work)
-- Baggage propagation (SDK-specific concern, not needed for manual spans)
-- The `_EnrichingBatchSpanProcessor` enrichment pattern (SDK internal)

From 3d56187c82aa7b76a34b28db05dc2f50afb7425e Mon Sep 17 00:00:00 2001
From: Julio Menendez Gonzalez <juliome@microsoft.com>
Date: Tue, 19 May 2026 10:48:37 -0600
Subject: [PATCH 11/11] fix: address adversarial review findings in manual
 instrumentation guide

- Add Agent365.Observability.OtelWrite auth scope requirement
- Add agent-ID-must-match-token constraint documentation
- Add output_messages operation type and span section
- Add server.port and gen_ai.output.messages to attribute tables
- Fix max payload: document 1MB server limit (900KB SDK buffer)
- Add payload chunking helper and span truncation guidance
- Fix token resolver signature to str | None, handle None case
- Add links mapping to DIY exporter (was hardcoded None)
- Add _chunk_by_size method to exporter for large batches

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/manual-a365-span-instrumentation.md | 155 +++++++++++++++++++----
 1 file changed, 131 insertions(+), 24 deletions(-)

diff --git a/docs/manual-a365-span-instrumentation.md b/docs/manual-a365-span-instrumentation.md
index 0c6ceddf..067590f8 100644
--- a/docs/manual-a365-span-instrumentation.md
+++ b/docs/manual-a365-span-instrumentation.md
@@ -42,6 +42,7 @@ Only spans with one of these values pass the backend's ingest filter:
 | `TextCompletion` | Inference (text completion) |
 | `GenerateContent` | Inference (content generation) |
 | `execute_tool` | Tool execution |
+| `output_messages` | Output message recording (agent response to user) |
 
 ### `invoke_agent` span
 
@@ -69,6 +70,8 @@ The top-level span representing one user turn / agent invocation.
 | Optional | `microsoft.channel.name` | Channel (e.g. `"Teams"`, `"Webchat"`) | |
 | Optional | `microsoft.channel.link` | Channel URL | |
 | Optional | `gen_ai.input.messages` | JSON-serialized input messages | Can be large; may be truncated |
+| Optional | `gen_ai.output.messages` | JSON-serialized output messages | Agent's response; may be truncated |
+| Optional | `server.port` | Server port number | Omit if 443 |
 | Optional | `microsoft.a365.caller.agent.name` | Calling agent name | For agent-to-agent calls |
 | Optional | `microsoft.a365.caller.agent.id` | Calling agent GUID | For agent-to-agent calls |
 | Optional | `microsoft.a365.caller.agent.blueprint.id` | Calling agent blueprint | For agent-to-agent calls |
@@ -111,6 +114,20 @@ Child of `invoke_agent`. One per tool invocation.
 | Optional | `gen_ai.tool.type` | `"function"` | |
 | Optional | `gen_ai.tool.description` | Tool description | |
 | Optional | `server.address` | Server hostname | |
+| Optional | `server.port` | Server port number | Omit if 443 |
+
+### `output_messages` span
+
+Child of `invoke_agent`. Records the agent's final response to the user.
+
+| Tier | Attribute | Expected value | Notes |
+|------|-----------|----------------|-------|
+| **Required** | `gen_ai.operation.name` | `"output_messages"` | Must match exactly |
+| **Required** | `microsoft.tenant.id` | Tenant GUID | Same as parent |
+| **Required** | `gen_ai.agent.id` | Agent GUID | Same as parent |
+| Recommended | `gen_ai.output.messages` | JSON-serialized output messages | The agent's response |
+| Recommended | `gen_ai.conversation.id` | Conversation identifier | |
+| Optional | `gen_ai.agent.name` | Agent name | Same as parent |
 
 ### Resource attributes
 
@@ -308,15 +325,19 @@ Authorization: Bearer <token>
 Content-Type: application/json
 ```
 
+The token must be issued for an app registration that has the **`Agent365.Observability.OtelWrite`** application role (scope). Without this role, the backend returns `403 Forbidden`.
+
+> **Important:** The `gen_ai.agent.id` value in your span attributes **must match** the application identity in the Bearer token. The backend validates that the agent ID in the payload corresponds to the authenticated app. Mismatches result in `403 Forbidden`.
+
 The token is obtained from a **token resolver** — a function with signature:
 
 ```python
-def resolve_token(agent_id: str, tenant_id: str) -> str:
-    """Return a valid Bearer token for the given agent and tenant."""
+def resolve_token(agent_id: str, tenant_id: str) -> str | None:
+    """Return a valid Bearer token for the given agent and tenant, or None if unavailable."""
     ...
 ```
 
-How you implement this depends on your environment (MSAL client credentials, managed identity, etc.). The A365 SDK uses this same interface internally.
+If the token resolver returns `None`, the exporter should skip that batch and log a warning. How you implement this depends on your environment (MSAL client credentials, managed identity, etc.). The A365 SDK uses this same interface internally.
 
 ### Payload format
 
@@ -396,12 +417,43 @@ The body is JSON with this structure:
 
 | Constraint | Value | Behavior |
 |------------|-------|----------|
-| Max payload size | ~900,000 bytes | Split spans across multiple POST requests |
-| Max individual span | 250,000 bytes | Largest attributes are replaced with `"TRUNCATED"` |
+| Max payload size (server limit) | 1,000,000 bytes | Requests exceeding 1 MB are rejected |
+| Recommended max payload | ~900,000 bytes | Use as conservative buffer below the 1 MB limit |
+| Max individual span | 250,000 bytes | Truncate largest attributes (see below) |
 | Retry on | 408, 429, 5xx | Exponential backoff; respect `Retry-After` header for 429 |
 | Fail on | Other 4xx | Non-retryable; check auth and payload format |
 | Timeout | 30 seconds | Per-request HTTP timeout |
 
+#### Payload chunking
+
+If a serialized batch exceeds ~900,000 bytes, split it into multiple POST requests. Each request must still respect the grouping requirement (same tenant + agent). A simple approach:
+
+```python
+def chunk_spans(spans: list[dict], max_bytes: int = 900_000) -> list[list[dict]]:
+    """Split serialized spans into chunks that fit within the payload limit."""
+    chunks = []
+    current_chunk = []
+    current_size = 0
+    overhead = 200  # approximate envelope overhead
+
+    for span in spans:
+        span_size = len(json.dumps(span, separators=(",", ":"), ensure_ascii=False).encode())
+        if current_chunk and current_size + span_size + overhead > max_bytes:
+            chunks.append(current_chunk)
+            current_chunk = []
+            current_size = 0
+        current_chunk.append(span)
+        current_size += span_size
+
+    if current_chunk:
+        chunks.append(current_chunk)
+    return chunks
+```
+
+#### Span truncation
+
+If a single span exceeds 250,000 bytes (typically due to large `gen_ai.input.messages` or `gen_ai.output.messages`), truncate the largest attribute values by replacing them with `"TRUNCATED"`. Prioritize keeping structural attributes intact and truncating message content first.
+
 ### Grouping requirement
 
 All spans in a single POST must share the same `microsoft.tenant.id` and `gen_ai.agent.id`. If your batch contains spans for multiple tenants or agents, partition them into separate requests.
@@ -425,10 +477,12 @@ logger = logging.getLogger(__name__)
 
 # Accepted operation names — spans with other values are filtered out
 ACCEPTED_OPERATIONS = frozenset({
-    "invoke_agent", "execute_tool", "chat", "Chat", "TextCompletion", "GenerateContent",
+    "invoke_agent", "execute_tool", "output_messages",
+    "chat", "Chat", "TextCompletion", "GenerateContent",
 })
 
 A365_ENDPOINT = "https://agent365.svc.cloud.microsoft"
+MAX_PAYLOAD_BYTES = 900_000
 MAX_RETRIES = 3
 HTTP_TIMEOUT = 30.0
 
@@ -439,7 +493,7 @@ class Agent365ManualExporter(SpanExporter):
     def __init__(self, token_resolver):
         """
         Args:
-            token_resolver: Callable(agent_id, tenant_id) -> bearer_token string.
+            token_resolver: Callable(agent_id, tenant_id) -> bearer_token string or None.
         """
         self._token_resolver = token_resolver
         self._session = requests.Session()
@@ -456,8 +510,6 @@ class Agent365ManualExporter(SpanExporter):
                 f"{A365_ENDPOINT}/observability/tenants/{tenant_id}"
                 f"/otlp/agents/{agent_id}/traces?api-version=1"
             )
-            payload = self._build_payload(group_spans)
-            body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
 
             # Resolve auth token
             try:
@@ -467,13 +519,28 @@ class Agent365ManualExporter(SpanExporter):
                 any_failure = True
                 continue
 
+            if token is None:
+                logger.warning(
+                    f"Token resolver returned None for agent={agent_id}, "
+                    f"tenant={tenant_id}; skipping batch"
+                )
+                any_failure = True
+                continue
+
             headers = {
                 "content-type": "application/json",
                 "authorization": f"Bearer {token}",
             }
 
-            if not self._post_with_retries(url, body, headers):
-                any_failure = True
+            # Build payload and chunk if necessary
+            mapped_spans = [self._map_span(sp) for sp in group_spans]
+            chunks = self._chunk_by_size(mapped_spans)
+
+            for chunk in chunks:
+                payload = self._build_payload_from_mapped(group_spans[0], chunk)
+                body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
+                if not self._post_with_retries(url, body, headers):
+                    any_failure = True
 
         return SpanExportResult.FAILURE if any_failure else SpanExportResult.SUCCESS
 
@@ -499,20 +566,22 @@ class Agent365ManualExporter(SpanExporter):
 
     def _build_payload(self, spans: Sequence[ReadableSpan]) -> dict:
         """Build the OTLP-like JSON envelope."""
-        # Get resource attributes from the first span
+        mapped = [self._map_span(sp) for sp in spans]
+        return self._build_payload_from_mapped(spans[0], mapped)
+
+    def _build_payload_from_mapped(
+        self, reference_span: ReadableSpan, mapped_spans: list[dict]
+    ) -> dict:
+        """Build the OTLP-like JSON envelope from pre-mapped span dicts."""
         resource_attrs = {}
-        if spans and spans[0].resource:
-            resource_attrs = dict(spans[0].resource.attributes)
+        if reference_span.resource:
+            resource_attrs = dict(reference_span.resource.attributes)
 
         # Group spans by instrumentation scope
         scope_map: dict[tuple[str, str | None], list[dict]] = {}
-        for sp in spans:
-            scope = sp.instrumentation_scope
-            scope_name = scope.name if scope else "unknown"
-            scope_version = scope.version if scope else None
-            scope_map.setdefault((scope_name, scope_version), []).append(
-                self._map_span(sp)
-            )
+        for sp_dict in mapped_spans:
+            # Use a default scope since mapped dicts don't carry scope info
+            scope_map.setdefault(("manual", None), []).append(sp_dict)
 
         scope_spans = [
             {"scope": {"name": name, "version": version}, "spans": mapped}
@@ -528,6 +597,31 @@ class Agent365ManualExporter(SpanExporter):
             ]
         }
 
+    @staticmethod
+    def _chunk_by_size(
+        mapped_spans: list[dict], max_bytes: int = MAX_PAYLOAD_BYTES
+    ) -> list[list[dict]]:
+        """Split mapped spans into chunks that fit within the payload limit."""
+        chunks: list[list[dict]] = []
+        current_chunk: list[dict] = []
+        current_size = 0
+        overhead = 200  # approximate envelope overhead
+
+        for span in mapped_spans:
+            span_size = len(
+                json.dumps(span, separators=(",", ":"), ensure_ascii=False).encode()
+            )
+            if current_chunk and current_size + span_size + overhead > max_bytes:
+                chunks.append(current_chunk)
+                current_chunk = []
+                current_size = 0
+            current_chunk.append(span)
+            current_size += span_size
+
+        if current_chunk:
+            chunks.append(current_chunk)
+        return chunks if chunks else [[]]
+
     @staticmethod
     def _map_span(sp: ReadableSpan) -> dict:
         """Convert a ReadableSpan to the A365 JSON format."""
@@ -552,6 +646,18 @@ class Agent365ManualExporter(SpanExporter):
                 for ev in sp.events
             ]
 
+        # Map links
+        links = None
+        if sp.links:
+            links = [
+                {
+                    "traceId": f"{link.context.trace_id:032x}",
+                    "spanId": f"{link.context.span_id:016x}",
+                    "attributes": dict(link.attributes) if link.attributes else None,
+                }
+                for link in sp.links
+            ]
+
         # Map status
         status_code = sp.status.status_code if sp.status else StatusCode.UNSET
         status = {
@@ -569,7 +675,7 @@ class Agent365ManualExporter(SpanExporter):
             "endTimeUnixNano": sp.end_time,
             "attributes": attrs or None,
             "events": events,
-            "links": None,
+            "links": links,
             "status": status,
         }
 
@@ -607,8 +713,9 @@ class Agent365ManualExporter(SpanExporter):
 ```python
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
 
-def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+def my_token_resolver(agent_id: str, tenant_id: str) -> str | None:
     # Your token acquisition logic here (MSAL, managed identity, etc.)
+    # Return None if token cannot be acquired
     return "your-bearer-token"
 
 exporter = Agent365ManualExporter(token_resolver=my_token_resolver)
@@ -648,7 +755,7 @@ PROVIDER_NAME = "azure"
 SERVER_ADDRESS = "my-resource.openai.azure.com"
 
 
-def my_token_resolver(agent_id: str, tenant_id: str) -> str:
+def my_token_resolver(agent_id: str, tenant_id: str) -> str | None:
     """Replace with your actual token acquisition logic."""
     raise NotImplementedError("Implement your token resolver")