Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added agent/__init__.py
Empty file.
Empty file added agent/core/__init__.py
Empty file.
159 changes: 159 additions & 0 deletions agent/core/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from typing import TypedDict, Annotated, Literal
from langgraph.graph import StateGraph, END
from pymilvus import connections, Collection
from sentence_transformers import SentenceTransformer
import operator
import json
import datetime

# ── Connect to Milvus ────────────────────────────────────
connections.connect("default", host="localhost", port="19530")
embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# ── State definition ─────────────────────────────────────
class AgentState(TypedDict):
question: str
route: str
chunks: list
citations: list
answer: str
messages: Annotated[list, operator.add]

# ── Helper: search Milvus ────────────────────────────────
def search_index(collection_name: str, question: str, top_k: int = 3) -> list:
vector = embed_model.encode(question).tolist()
collection = Collection(collection_name)
collection.load()
results = collection.search(
data=[vector],
anns_field="vector",
param={"metric_type": "COSINE", "params": {"nprobe": 10}},
limit=top_k,
output_fields=["content_text", "source_url", "h1", "h2"]
)
chunks = []
for hit in results[0]:
chunks.append({
"text": hit.entity.get("content_text", ""),
"source_url": hit.entity.get("source_url", ""),
"h1": hit.entity.get("h1", ""),
"h2": hit.entity.get("h2", ""),
"score": round(hit.score, 4)
})
return chunks

# ── Node 1: Router ───────────────────────────────────────
def router(state: AgentState) -> dict:
q = state["question"].lower()

code_keywords = [
"yaml", "manifest", "crd", "deployment", "service",
"bug", "error", "crash", "fix", "issue", "exception",
"code", "function", "class", "api", "webhook", "config"
]
doc_keywords = [
"what is", "how does", "explain", "overview", "concept",
"architecture", "introduction", "guide", "tutorial"
]

code_score = sum(1 for w in code_keywords if w in q)
doc_score = sum(1 for w in doc_keywords if w in q)

if code_score > doc_score:
route = "code"
elif doc_score > code_score:
route = "docs"
else:
route = "both"

# Emit routing log — future training data
log = {
"timestamp": datetime.datetime.utcnow().isoformat(),
"question": state["question"],
"route": route,
"doc_score": doc_score,
"code_score": code_score
}
with open("routing_logs.jsonl", "a") as f:
f.write(json.dumps(log) + "\n")

print(f"[Router] route={route} doc_score={doc_score} code_score={code_score}")
return {"route": route}

# ── Routing function ─────────────────────────────────────
def decide_tool(state: AgentState) -> Literal["query_docs", "query_code", "query_both"]:
return {
"docs": "query_docs",
"code": "query_code",
"both": "query_both"
}[state["route"]]

# ── Node 2a: Query docs index ────────────────────────────
def query_docs(state: AgentState) -> dict:
print("[Tool] searching docs_index...")
chunks = search_index("docs_index", state["question"])
citations = list(set(c["source_url"] for c in chunks if c["source_url"]))
return {"chunks": chunks, "citations": citations}

# ── Node 2b: Query code index ────────────────────────────
def query_code(state: AgentState) -> dict:
print("[Tool] searching code_index...")
chunks = search_index("code_index", state["question"])
citations = list(set(c["source_url"] for c in chunks if c["source_url"]))
return {"chunks": chunks, "citations": citations}

# ── Node 2c: Query both indexes ──────────────────────────
def query_both(state: AgentState) -> dict:
print("[Tool] searching both indexes...")
doc_chunks = search_index("docs_index", state["question"], top_k=2)
code_chunks = search_index("code_index", state["question"], top_k=2)
chunks = doc_chunks + code_chunks
citations = list(set(c["source_url"] for c in chunks if c["source_url"]))
return {"chunks": chunks, "citations": citations}

# ── Node 3: Synthesize ───────────────────────────────────
def synthesize(state: AgentState) -> dict:
context = "\n\n".join([
f"Source: {c['source_url']}\nSection: {c['h1']} > {c['h2']}\n{c['text']}"
for c in state["chunks"]
])

# Stub — Phase 2 Part 3 replaces this with real LLM call
answer = (
f"Based on {len(state['chunks'])} retrieved chunks "
f"from the {state['route']} index:\n\n"
f"{context[:800]}\n\n"
f"Citations: {', '.join(state['citations'])}"
)
return {"answer": answer}

# ── Build the graph ──────────────────────────────────────
def build_agent():
builder = StateGraph(AgentState)

builder.add_node("router", router)
builder.add_node("query_docs", query_docs)
builder.add_node("query_code", query_code)
builder.add_node("query_both", query_both)
builder.add_node("synthesize", synthesize)

builder.set_entry_point("router")

builder.add_conditional_edges(
"router",
decide_tool,
{
"query_docs": "query_docs",
"query_code": "query_code",
"query_both": "query_both",
}
)

builder.add_edge("query_docs", "synthesize")
builder.add_edge("query_code", "synthesize")
builder.add_edge("query_both", "synthesize")
builder.add_edge("synthesize", END)

return builder.compile()

agent = build_agent()
51 changes: 51 additions & 0 deletions agent/kserve/agent-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: docs-agent-server
namespace: docs-agent
spec:
replicas: 1
selector:
matchLabels:
app: docs-agent-server
template:
metadata:
labels:
app: docs-agent-server
spec:
containers:
- name: server
image: thadev14/docs-agent:latest
ports:
- containerPort: 8000
env:
- name: MILVUS_HOST
value: "milvus-standalone.docs-agent.svc.cluster.local"
- name: MILVUS_PORT
value: "19530"
- name: AGENT_BACKEND
value: "langgraph"
readinessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 10
periodSeconds: 5
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: docs-agent-mcp-service
namespace: docs-agent
spec:
selector:
app: docs-agent-server
ports:
- port: 8000
targetPort: 8000
25 changes: 25 additions & 0 deletions agent/kserve/embedding-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: embedding-service
namespace: docs-agent
annotations:
autoscaling.knative.dev/scaleToZero: "false"
spec:
predictor:
minReplicas: 1
maxReplicas: 2
model:
modelFormat:
name: huggingface
runtime: llm-runtime
args:
- --model_id=sentence-transformers/all-mpnet-base-v2
- --backend=huggingface
resources:
requests:
cpu: "2"
memory: "4Gi"
limits:
cpu: "4"
memory: "8Gi"
37 changes: 37 additions & 0 deletions agent/kserve/kagent-crd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: kagent.dev/v1alpha1
kind: Agent
metadata:
name: kubeflow-docs-agent
namespace: docs-agent
spec:
description: >
Agentic RAG assistant for Kubeflow documentation and code.
Routes queries to docs_index or code_index based on intent.
Implements Thin Context MCP pattern per gsoc2026_agentic_rag.md.
systemPrompt: |
You are the Kubeflow Documentation Assistant.
You have access to two knowledge sources:
- Kubeflow official documentation (conceptual questions)
- Kubeflow manifests codebase (YAML configs, debugging)
Always return cited answers with source URLs.
Keep responses concise and technically accurate.
modelConfig:
apiKeySecretRef:
name: llm-secret
key: apiKey
tools:
- name: query_docs
description: Search Kubeflow documentation index
type: McpServer
mcpServer:
url: http://docs-agent-mcp-service:8000/mcp/query_docs
- name: query_code
description: Search Kubeflow manifests code index
type: McpServer
mcpServer:
url: http://docs-agent-mcp-service:8000/mcp/query_code
- name: query_both
description: Search both docs and code indexes
type: McpServer
mcpServer:
url: http://docs-agent-mcp-service:8000/mcp/query_both
40 changes: 40 additions & 0 deletions agent/kserve/llm-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: llama-service
namespace: docs-agent
annotations:
autoscaling.knative.dev/scaleToZero: "true"
autoscaling.knative.dev/scaleToZeroPodRetentionPeriod: "5m"
autoscaling.knative.dev/initialScale: "1"
autoscaling.knative.dev/scaleUpDelay: "0s"
spec:
predictor:
minReplicas: 0
maxReplicas: 1
model:
modelFormat:
name: huggingface
runtime: llm-runtime
args:
- --model_id=RedHatAI/Llama-3.1-8B-Instruct
- --backend=vllm
- --max-model-len=32768
- --gpu-memory-utilization=0.90
- --enable-auto-tool-choice
- --tool-call-parser=llama3_json
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: huggingface-secret
key: token
resources:
requests:
cpu: "4"
memory: "16Gi"
nvidia.com/gpu: "1"
limits:
cpu: "6"
memory: "24Gi"
nvidia.com/gpu: "1"
Loading