kubeflow · ZaidAli3642 · Mar 27, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,22 @@
+# Milvus Vector Database
+MILVUS_HOST=localhost
+MILVUS_PORT=19530
+MILVUS_COLLECTION=docs_rag
+MILVUS_VECTOR_FIELD=vector
+
+# LLM Configuration
+# For local development with Ollama:
+KSERVE_URL=http://localhost:11434/v1/chat/completions
+MODEL=llama3.1:8b
+# For production with KServe:
+# KSERVE_URL=http://llama.docs-agent.svc.cluster.local/openai/v1/chat/completions
+# MODEL=llama3.1-8B
+
+# Embedding Model
+EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
+
+# API Server
+PORT=8000
+
+# GitHub Token (optional - increases API rate limit)
+# GITHUB_TOKEN=ghp_your_token_here
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3.11-slim
+
+# Create non-root user
+RUN useradd -m -u 1000 appuser
+WORKDIR /app
+
+# Install server dependencies
+COPY server-https/requirements.txt ./requirements-server.txt
+RUN pip install --no-cache-dir -r requirements-server.txt
+
+# Install ingestion dependencies
+RUN pip install --no-cache-dir requests langchain-text-splitters
+
+# Environment variables
+ENV PORT=8000
+
+# Switch to non-root before running the app
+USER appuser
+
+# Copy server and ingestion script
+COPY server-https/app.py /app/
+COPY scripts/local_ingest.py /app/
+
+EXPOSE 8000
+CMD ["python", "-u", "app.py"]
diff --git a/README.md b/README.md
@@ -9,11 +9,13 @@ The official LLM implementation of the Kubeflow Documentation Assistant powered
 - [Overview](#overview)
 - [Architecture](#architecture)
 - [Prerequisites](#prerequisites)
-- [Installation](#installation)
+- [Local Development Setup](#local-development-setup)
+- [Installation (Kubernetes)](#installation)
   - [Milvus Vector Database](#milvus-vector-database)
   - [KServe Inference Service](#kserve-inference-service)
   - [Kubeflow Pipelines](#kubeflow-pipelines)
   - [API Server](#api-server)
+- [Local vs Production](#local-vs-production)
 - [Usage](#usage)
 - [Configuration](#configuration)
 - [Troubleshooting](#troubleshooting)
@@ -58,8 +60,97 @@ Kubeflow users often struggle to find relevant information across the extensive
 - GPU nodes (for LLM inference)
 - SSL certificate (for HTTPS API)
 
+## Local Development Setup
+
+For local development without a Kubernetes cluster, you can use Docker Compose to run the full stack locally.
+
+### Prerequisites
+
+- [Docker Desktop](https://www.docker.com/products/docker-desktop/)
+- [Ollama](https://ollama.ai/) (for local LLM inference)
+
+### 1. Install Ollama
+
+Ollama runs the Llama 3.1 model locally on your machine.
+
+**macOS:**
+```bash
+brew install ollama
+```
+
+**Linux:**
+```bash
+curl -fsSL https://ollama.com/install.sh | sh
+```
+
+Start the Ollama server and pull the model:
+```bash
+ollama serve
+ollama pull llama3.1:8b
+```
+
+### 2. Clone and Configure
+
+```bash
+git clone https://github.com/kubeflow/docs-agent.git
+cd docs-agent
+cp .env.example .env
+```
+
+### 3. Start All Services
+
+```bash
+docker-compose up --build -d
+```
+
+This starts:
+- **Milvus** — vector database (port `19530`)
+- **etcd** — metadata storage for Milvus
+- **MinIO** — object storage for Milvus
+- **API Server** — FastAPI server (port `8000`) with hot reload
+
+> **Note:** The `--build` flag is only needed on the first run or after changing the Dockerfile/requirements. For subsequent runs, use `docker-compose up -d`.
+
+Verify all services are running:
+```bash
+docker-compose ps
+```
+
+### 4. Ingest Documentation
+
+Populate Milvus with Kubeflow documentation:
+```bash
+docker exec docs-agent-api python local_ingest.py
+```
+
+This is a one-time step. Re-run only when documentation is updated.
+
+### 5. Test
+
+```bash
+curl -X POST http://localhost:8000/chat \
+  -H "Content-Type: application/json" \
+  -d '{"message": "What is Kubeflow?", "stream": false}'
+```
+
+### Docker Compose Commands
+
+| Command | Description |
+|---------|-------------|
+| `docker-compose up -d` | Start all services in background |
+| `docker-compose up --build -d` | Rebuild and start (after Dockerfile/requirements change) |
+| `docker-compose ps` | Check service status |
+| `docker-compose logs -f` | View logs |
+| `docker-compose logs -f api-server` | View API server logs only |
+| `docker-compose down` | Stop all services |
+| `docker-compose down -v` | Stop and remove all data |
+
+---
+
 ## Installation
 
+The following instructions are for deploying to a **Kubernetes cluster** in production.
+
 ### Milvus Vector Database
 
 #### What is Milvus?
@@ -536,6 +627,19 @@ if data.get('citations'):
     print(f"Sources: {data['citations']}")
 ```
 
+## Local vs Production
+
+| Component | Local Development | Production (Kubernetes) |
+|-----------|------------------|------------------------|
+| **Vector DB** | Docker Compose (Milvus standalone) | Helm chart on Kubernetes |
+| **LLM** | Ollama (runs on CPU/Apple Silicon) | KServe + vLLM (NVIDIA GPU) |
+| **Data Ingestion** | `scripts/local_ingest.py` | Kubeflow Pipelines |
+| **API Server** | `python server-https/app.py` | Kubernetes Deployment |
+| **Service Mesh** | Not needed | Istio (mTLS + RBAC) |
+| **SSL** | Plain HTTP | HTTPS with certificates |
+
+Both setups use the same API format and produce identical results. The local setup uses lightweight alternatives that don't require Kubernetes or GPUs.
+
 ## Configuration
 
 ### Environment Variables

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,86 @@
+services:
+  etcd:
+    image: quay.io/coreos/etcd:v3.5.18
+    container_name: milvus-etcd
+    environment:
+      - ETCD_AUTO_COMPACTION_MODE=revision
+      - ETCD_AUTO_COMPACTION_RETENTION=1000
+      - ETCD_QUOTA_BACKEND_BYTES=4294967296
+      - ETCD_SNAPSHOT_COUNT=50000
+    volumes:
+      - etcd_data:/etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+    healthcheck:
+      test: ["CMD", "etcdctl", "endpoint", "health"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  minio:
+    image: minio/minio:RELEASE.2023-03-20T20-16-18Z
+    container_name: milvus-minio
+    environment:
+      MINIO_ACCESS_KEY: minioadmin
+      MINIO_SECRET_KEY: minioadmin
+    ports:
+      - "9001:9001"
+    volumes:
+      - minio_data:/minio_data
+    command: minio server /minio_data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  milvus:
+    image: milvusdb/milvus:v2.4.17
+    container_name: milvus-standalone
+    environment:
+      ETCD_ENDPOINTS: etcd:2379
+      MINIO_ADDRESS: minio:9000
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    volumes:
+      - milvus_data:/var/lib/milvus
+    command: ["milvus", "run", "standalone"]
+    depends_on:
+      etcd:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  api-server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: docs-agent-api
+    ports:
+      - "8000:8000"
+    environment:
+      MILVUS_HOST: milvus
+      MILVUS_PORT: "19530"
+      MILVUS_COLLECTION: docs_rag
+      MILVUS_VECTOR_FIELD: vector
+      KSERVE_URL: http://host.docker.internal:11434/v1/chat/completions
+      MODEL: llama3.1:8b
+      EMBEDDING_MODEL: sentence-transformers/all-mpnet-base-v2
+      PORT: "8000"
+    volumes:
+      - ./server-https/app.py:/app/app.py
+      - ./scripts/local_ingest.py:/app/local_ingest.py
+    command: ["python", "-u", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+    depends_on:
+      milvus:
+        condition: service_healthy
+
+volumes:
+  etcd_data:
+  minio_data:
+  milvus_data: