From 349c610450b43cef075644df3d894e8c0fba68fc Mon Sep 17 00:00:00 2001
From: Jonas Thamane <166150947+NathiJonas@users.noreply.github.com>
Date: Sun, 8 Mar 2026 23:54:11 +0200
Subject: [PATCH 1/2] Create Jonas Thamane Week 5 PR.ipynb
---
.../Jonas Thamane Week 5 PR.ipynb | 2764 +++++++++++++++++
1 file changed, 2764 insertions(+)
create mode 100644 community-contributions/Jonas Thamane Week 5 PR.ipynb
diff --git a/community-contributions/Jonas Thamane Week 5 PR.ipynb b/community-contributions/Jonas Thamane Week 5 PR.ipynb
new file mode 100644
index 0000000000..de4141564d
--- /dev/null
+++ b/community-contributions/Jonas Thamane Week 5 PR.ipynb
@@ -0,0 +1,2764 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "169f472e",
+ "metadata": {},
+ "source": [
+ "# Week 5 Exercise — Personal Information Processor with RAG\n",
+ "\n",
+ "A complete RAG pipeline rebuilt with **Anthropic Claude API** \n",
+ "\n",
+ "### Features\n",
+ "- Document loading from multiple folders (Markdown files)\n",
+ "- Intelligent text chunking with overlap\n",
+ "- Vector embeddings via Claude\n",
+ "- ChromaDB vector store for efficient retrieval\n",
+ "- t-SNE visualization (2D and 3D)\n",
+ "- Conversational RAG with memory\n",
+ "- Gradio chat interface\n",
+ "- Source attribution in answers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9df19ead",
+ "metadata": {},
+ "source": [
+ "## 1. Install Dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ee0830c0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "[notice] A new release of pip is available: 24.0 -> 26.0.1\n",
+ "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "!{sys.executable} -m pip install -q gradio anthropic chromadb python-dotenv numpy plotly scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b00f5a61",
+ "metadata": {},
+ "source": [
+ "## 2. Setup and Configuration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd5a77b7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Anthropic API Key found: sk-ant-api03-me...\n",
+ "✅ Configuration ready\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import re\n",
+ "import uuid\n",
+ "import json\n",
+ "import glob\n",
+ "import textwrap\n",
+ "from pathlib import Path\n",
+ "from typing import Optional\n",
+ "\n",
+ "import numpy as np\n",
+ "import plotly.graph_objects as go\n",
+ "from sklearn.manifold import TSNE\n",
+ "import gradio as gr\n",
+ "import anthropic\n",
+ "import chromadb\n",
+ "from dotenv import load_dotenv\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "\n",
+ "\n",
+ "ANTHROPIC_MODEL = \"claude-sonnet-4-20250514\"\n",
+ "DB_NAME = \"personal_knowledge_db\"\n",
+ "CHUNK_SIZE = 500 # characters per chunk\n",
+ "CHUNK_OVERLAP = 100 # character overlap between chunks\n",
+ "TOP_K_RESULTS = 5 # chunks to retrieve per query\n",
+ "KNOWLEDGE_BASE_PATH = Path(\"knowledge_base\")\n",
+ "\n",
+ "api_key = os.getenv(\"ANTHROPIC_API_KEY\", \"\")\n",
+ "if api_key:\n",
+ " print(f\"Anthropic API Key found: {api_key[:15]}...\")\n",
+ "else:\n",
+ " print(\"⚠️ ANTHROPIC_API_KEY not set — add it to a .env file or set os.environ directly\")\n",
+ "\n",
+ "print(\"✅ Configuration ready\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c2bf47d",
+ "metadata": {},
+ "source": [
+ "## 3. Global State"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "636ae5d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Global state initialised\n"
+ ]
+ }
+ ],
+ "source": [
+ "_client: Optional[anthropic.Anthropic] = None\n",
+ "_chroma: Optional[chromadb.Client] = None\n",
+ "_collection: Optional[chromadb.Collection] = None\n",
+ "_chat_history: list[dict] = [] \n",
+ "\n",
+ "\n",
+ "def get_client() -> anthropic.Anthropic:\n",
+ " global _client\n",
+ " if _client is None:\n",
+ " key = os.getenv(\"ANTHROPIC_API_KEY\", \"\")\n",
+ " if not key:\n",
+ " raise ValueError(\"ANTHROPIC_API_KEY not set\")\n",
+ " _client = anthropic.Anthropic(api_key=key)\n",
+ " return _client\n",
+ "\n",
+ "\n",
+ "print(\"✅ Global state initialised\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "211bf388",
+ "metadata": {},
+ "source": [
+ "## 4. Sample Knowledge Base"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "ea9c4f6f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Exists: knowledge_base\\personal\\profile.md\n",
+ " Exists: knowledge_base\\projects\\portfolio.md\n",
+ " Exists: knowledge_base\\learning\\journey.md\n",
+ "\n",
+ "✅ Knowledge base ready at: C:\\Users\\Lenovo\\Downloads\\knowledge_base\n"
+ ]
+ }
+ ],
+ "source": [
+ "def create_sample_knowledge_base():\n",
+ " \"\"\"Create sample knowledge base with personal, projects, and learning data.\"\"\"\n",
+ "\n",
+ " for folder in [\"personal\", \"projects\", \"learning\"]:\n",
+ " (KNOWLEDGE_BASE_PATH / folder).mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ " files = {\n",
+ " \"personal/profile.md\": \"\"\"\n",
+ "# Personal Profile\n",
+ "\n",
+ "## About Me\n",
+ "Name: Alex Johnson\n",
+ "Role: Software Engineer & AI Enthusiast\n",
+ "Location: Tech Hub City\n",
+ "\n",
+ "## Background\n",
+ "I am a passionate software engineer with over 5 years of experience building scalable applications.\n",
+ "My journey started with web development and has evolved into specialising in AI and machine learning.\n",
+ "I completed my Computer Science degree at State University and have since worked at two startups.\n",
+ "\n",
+ "## Skills\n",
+ "- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n",
+ "- Frameworks: React, FastAPI, LangChain, Gradio\n",
+ "- AI/ML: LLMs, RAG systems, Vector Databases, Prompt Engineering, Fine-tuning\n",
+ "- Databases: PostgreSQL, MongoDB, Chroma, Pinecone, Redis\n",
+ "- Cloud: AWS, GCP, Docker, Kubernetes\n",
+ "\n",
+ "## Interests\n",
+ "I love exploring new technologies, contributing to open-source projects, and mentoring aspiring developers.\n",
+ "In my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\n",
+ "I run a small tech blog with 2,000 monthly readers.\n",
+ "\n",
+ "## Contact\n",
+ "GitHub: github.com/alexj\n",
+ "LinkedIn: linkedin.com/in/alexjohnson\n",
+ "Email: alex@techmail.com\n",
+ "\"\"\",\n",
+ "\n",
+ " \"projects/portfolio.md\": \"\"\"\n",
+ "# Projects Portfolio\n",
+ "\n",
+ "## AI-Powered Document Assistant\n",
+ "A RAG-based system that helps users query large document collections efficiently.\n",
+ "Tech Stack: Python, LangChain, Chroma, Anthropic Claude API\n",
+ "Status: Shipped — used by 300+ beta users\n",
+ "Key Features: Semantic search, multi-document support, conversation history, source attribution\n",
+ "Lessons Learned: Chunking strategy matters enormously; smaller overlapping chunks outperform large ones.\n",
+ "\n",
+ "## Real-time Analytics Dashboard\n",
+ "Built a scalable dashboard for visualising business metrics in real-time.\n",
+ "Tech Stack: React, Node.js, PostgreSQL, Redis, WebSockets\n",
+ "Impact: Reduced reporting time by 80% for the operations team.\n",
+ "Challenges: Handling 10,000 concurrent WebSocket connections required careful connection pooling.\n",
+ "\n",
+ "## Code Review Automation Tool\n",
+ "An AI assistant that provides automated code reviews and suggestions.\n",
+ "Tech Stack: Python, GitHub API, Claude API\n",
+ "Features: Pattern detection, best practices recommendations, security vulnerability scanning\n",
+ "Status: Open-sourced on GitHub with 450 stars\n",
+ "\n",
+ "## Personal Finance Tracker\n",
+ "Full-stack application for tracking expenses and predicting future spending patterns.\n",
+ "Tech Stack: React Native, FastAPI, PostgreSQL, ML forecasting\n",
+ "Features: Receipt scanning via OCR, budget alerts, spending trend analysis\n",
+ "\n",
+ "## E-commerce Recommendation Engine\n",
+ "Built a collaborative filtering engine for a mid-size online retailer.\n",
+ "Tech Stack: Python, PyTorch, FastAPI, Redis\n",
+ "Impact: 23% increase in average order value after deployment.\n",
+ "\"\"\",\n",
+ "\n",
+ " \"learning/journey.md\": \"\"\"\n",
+ "# Learning Journey\n",
+ "\n",
+ "## Currently Learning (2025)\n",
+ "- Advanced RAG techniques: reranking, hybrid search, query decomposition\n",
+ "- Anthropic's Claude API: tool use, streaming, multi-turn conversations\n",
+ "- Rust programming language: ownership model, async Tokio runtime\n",
+ "- System design: distributed systems, consensus algorithms\n",
+ "\n",
+ "## Completed Courses\n",
+ "- Deep Learning Specialisation — Coursera (Andrew Ng) — 2024\n",
+ "- Fullstack Open — University of Helsinki — 2022\n",
+ "- AWS Solutions Architect Associate — 2023\n",
+ "- Fast.ai Practical Deep Learning — 2024\n",
+ "\n",
+ "## Books Read\n",
+ "- \"Designing Data-Intensive Applications\" — Martin Kleppmann\n",
+ "- \"The Pragmatic Programmer\" — Hunt & Thomas\n",
+ "- \"Building Machine Learning Powered Applications\" — Emmanuel Ameisen\n",
+ "- \"Attention Is All You Need\" — Vaswani et al. (paper)\n",
+ "\n",
+ "## Certifications\n",
+ "- AWS Certified Solutions Architect — Associate (2023)\n",
+ "- Google Professional Data Engineer (2024)\n",
+ "- Certified Kubernetes Application Developer — CKAD (2023)\n",
+ "\n",
+ "## Learning Goals for 2026\n",
+ "- Contribute to a major open-source AI project\n",
+ "- Build and ship a SaaS product from scratch\n",
+ "- Complete a Rust systems project\n",
+ "- Publish 3 technical blog posts on RAG architecture\n",
+ "\n",
+ "## Study Schedule\n",
+ "Mornings (6-7am): Reading / papers\n",
+ "Lunch (12-1pm): Coding exercises\n",
+ "Evenings (8-9pm): Project work or courses\n",
+ "\"\"\"\n",
+ " }\n",
+ "\n",
+ " for path, content in files.items():\n",
+ " full_path = KNOWLEDGE_BASE_PATH / path\n",
+ " if not full_path.exists():\n",
+ " full_path.write_text(content.strip(), encoding=\"utf-8\")\n",
+ " print(f\" Created: {full_path}\")\n",
+ " else:\n",
+ " print(f\" Exists: {full_path}\")\n",
+ "\n",
+ " print(f\"\\n✅ Knowledge base ready at: {KNOWLEDGE_BASE_PATH.resolve()}\")\n",
+ "\n",
+ "\n",
+ "create_sample_knowledge_base()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f2de26a5",
+ "metadata": {},
+ "source": [
+ "## 5. Document Loading & Chunking"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4d2684a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loaded 3 documents\n",
+ "Document types: ['learning', 'personal', 'projects']\n",
+ "\n",
+ "Total chunks: 10\n",
+ "Average chunk size: 460 characters\n",
+ "Chunks per doc type: {'projects': 4, 'learning': 3, 'personal': 3}\n"
+ ]
+ }
+ ],
+ "source": [
+ "def load_documents() -> list[dict]:\n",
+ " \"\"\"\n",
+ " Walk each subfolder of the knowledge base and load all .md files.\n",
+ " Returns list of {text, doc_type, source}.\n",
+ " \"\"\"\n",
+ " documents = []\n",
+ " for folder in sorted(KNOWLEDGE_BASE_PATH.iterdir()):\n",
+ " if not folder.is_dir():\n",
+ " continue\n",
+ " doc_type = folder.name\n",
+ " for file in sorted(folder.rglob(\"*.md\")):\n",
+ " text = file.read_text(encoding=\"utf-8\", errors=\"ignore\")\n",
+ " documents.append({\n",
+ " \"text\": text,\n",
+ " \"doc_type\": doc_type,\n",
+ " \"source\": file.name,\n",
+ " })\n",
+ " print(f\"Loaded {len(documents)} documents\")\n",
+ " print(f\"Document types: {sorted(set(d['doc_type'] for d in documents))}\")\n",
+ " return documents\n",
+ "\n",
+ "\n",
+ "def chunk_document(doc: dict,\n",
+ " chunk_size: int = CHUNK_SIZE,\n",
+ " overlap: int = CHUNK_OVERLAP) -> list[dict]:\n",
+ " \"\"\"\n",
+ " Split a document into overlapping character-level chunks.\n",
+ " Each chunk inherits the document's metadata.\n",
+ " \"\"\"\n",
+ " text = doc[\"text\"]\n",
+ " chunks = []\n",
+ " start = 0\n",
+ " while start < len(text):\n",
+ " end = min(start + chunk_size, len(text))\n",
+ " chunks.append({\n",
+ " \"content\": text[start:end],\n",
+ " \"doc_type\": doc[\"doc_type\"],\n",
+ " \"source\": doc[\"source\"],\n",
+ " })\n",
+ " if end == len(text):\n",
+ " break\n",
+ " start += chunk_size - overlap\n",
+ " return chunks\n",
+ "\n",
+ "\n",
+ "# Load and chunk\n",
+ "documents = load_documents()\n",
+ "\n",
+ "all_chunks: list[dict] = []\n",
+ "for doc in documents:\n",
+ " all_chunks.extend(chunk_document(doc))\n",
+ "\n",
+ "print(f\"\\nTotal chunks: {len(all_chunks)}\")\n",
+ "avg = sum(len(c['content']) for c in all_chunks) / len(all_chunks)\n",
+ "print(f\"Average chunk size: {avg:.0f} characters\")\n",
+ "print(f\"Chunks per doc type: { {t: sum(1 for c in all_chunks if c['doc_type']==t) for t in set(c['doc_type'] for c in all_chunks)} }\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03c2bf57",
+ "metadata": {},
+ "source": [
+ "## 6. Embeddings\n",
+ "\n",
+ "Uses Claude to generate 128-dim semantic embeddings with robust JSON parsing and auto-retry."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a0c76d4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Embedding functions defined\n"
+ ]
+ }
+ ],
+ "source": [
+ "def _parse_embedding(raw: str) -> list[float]:\n",
+ " \"\"\"Robustly extract a float array from Claude's response.\"\"\"\n",
+ " raw = re.sub(r\"```[a-z]*\", \"\", raw).strip().strip(\"`\").strip()\n",
+ "\n",
+ " try:\n",
+ " vec = json.loads(raw)\n",
+ " if isinstance(vec, list):\n",
+ " return [float(x) for x in vec]\n",
+ " except Exception:\n",
+ " pass\n",
+ "\n",
+ " m = re.search(r\"(\\[.*?\\])\", raw, re.DOTALL)\n",
+ " if m:\n",
+ " candidate = re.sub(r\",\\s*]\", \"]\", m.group(1))\n",
+ " try:\n",
+ " vec = json.loads(candidate)\n",
+ " if isinstance(vec, list):\n",
+ " return [float(x) for x in vec]\n",
+ " except Exception:\n",
+ " pass\n",
+ "\n",
+ " nums = re.findall(r\"-?\\d+\\.\\d+(?:[eE][+-]?\\d+)?|-?\\d+\", raw)\n",
+ " if nums:\n",
+ " return [float(n) for n in nums]\n",
+ "\n",
+ " raise ValueError(f\"Cannot parse embedding from:\\n{raw[:300]}\")\n",
+ "\n",
+ "\n",
+ "def embed_texts(texts: list[str], max_retries: int = 3) -> list[list[float]]:\n",
+ " \"\"\"\n",
+ " Generate 128-dim semantic embeddings via Claude.\n",
+ " Includes robust JSON parsing and retry logic.\n",
+ " Swap for voyage-3 / text-embedding-3-small in production.\n",
+ " \"\"\"\n",
+ " client = get_client()\n",
+ " embeddings = []\n",
+ "\n",
+ " for i, text in enumerate(texts):\n",
+ " safe = re.sub(r\"[^\\x20-\\x7E]\", \" \", text).strip()[:800]\n",
+ " prompt = (\n",
+ " \"Return a JSON array of exactly 128 floats (values between -1 and 1) \"\n",
+ " \"representing the semantic embedding of the text below.\\n\"\n",
+ " \"Rules: output ONLY the JSON array starting with [ and ending with ]. \"\n",
+ " \"No prose, no markdown. Use at most 6 decimal places. Do NOT truncate.\\n\\n\"\n",
+ " f\"TEXT:\\n{safe}\"\n",
+ " )\n",
+ " vec = None\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " resp = client.messages.create(\n",
+ " model=ANTHROPIC_MODEL,\n",
+ " max_tokens=1200,\n",
+ " messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+ " )\n",
+ " vec = _parse_embedding(resp.content[0].text.strip())\n",
+ " if len(vec) < 128:\n",
+ " vec += [0.0] * (128 - len(vec))\n",
+ " vec = vec[:128]\n",
+ " break\n",
+ " except Exception as e:\n",
+ " print(f\" [embed {i}] attempt {attempt+1} failed: {e}\")\n",
+ "\n",
+ " if vec is None:\n",
+ " raise RuntimeError(f\"Embedding failed for chunk {i} after {max_retries} attempts\")\n",
+ "\n",
+ " norm = sum(x**2 for x in vec) ** 0.5 or 1.0\n",
+ " embeddings.append([x / norm for x in vec])\n",
+ "\n",
+ " return embeddings\n",
+ "\n",
+ "\n",
+ "print(\"✅ Embedding functions defined\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3cc77897",
+ "metadata": {},
+ "source": [
+ "## 7. Build the Vector Store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "54ca1938",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Embedding 10 chunks (this may take a minute)...\n",
+ "\n",
+ "✅ Vector store ready — 10 chunks stored\n",
+ " Embedding dimensions: 128\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"Embedding {len(all_chunks)} chunks (this may take a minute)...\")\n",
+ "\n",
+ "_chroma = chromadb.Client() \n",
+ "col_name = \"personal_rag\"\n",
+ "if col_name in [c.name for c in _chroma.list_collections()]:\n",
+ " _chroma.delete_collection(col_name)\n",
+ "_collection = _chroma.get_or_create_collection(\n",
+ " col_name, metadata={\"hnsw:space\": \"cosine\"}\n",
+ ")\n",
+ "\n",
+ "vectors = embed_texts([c[\"content\"] for c in all_chunks])\n",
+ "\n",
+ "_collection.add(\n",
+ " ids = [str(uuid.uuid4()) for _ in all_chunks],\n",
+ " embeddings= vectors,\n",
+ " documents = [c[\"content\"] for c in all_chunks],\n",
+ " metadatas = [{\"doc_type\": c[\"doc_type\"], \"source\": c[\"source\"]} for c in all_chunks],\n",
+ ")\n",
+ "\n",
+ "print(f\"\\n✅ Vector store ready — {_collection.count():,} chunks stored\")\n",
+ "print(f\" Embedding dimensions: 128\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "23eefcfa",
+ "metadata": {},
+ "source": [
+ "## 8. t-SNE Visualisation (2D & 3D)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a54bd7f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f38ffcd8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running t-SNE on 10 vectors (perplexity=5.0)...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "hoverinfo": "text",
+ "marker": {
+ "color": [
+ "#4dd0e1",
+ "#4dd0e1",
+ "#4dd0e1",
+ "#7c6af7",
+ "#7c6af7",
+ "#7c6af7",
+ "#f06292",
+ "#f06292",
+ "#f06292",
+ "#f06292"
+ ],
+ "line": {
+ "color": "white",
+ "width": 0.5
+ },
+ "opacity": 0.85,
+ "size": 7
+ },
+ "mode": "markers",
+ "text": [
+ "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...",
+ "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...",
+ "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...",
+ "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...",
+ "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...",
+ "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...",
+ "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...",
+ "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...",
+ "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...",
+ "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..."
+ ],
+ "type": "scatter",
+ "x": {
+ "bdata": "VFF9winPucKlebdB6VmIwrQdVUIaBgVC0CFxwgBTHML2SUlCqfk8QQ==",
+ "dtype": "f4"
+ },
+ "y": {
+ "bdata": "2bg+wq+WxsGq+ITB042bQgc66ELOwhZDIH/9Ql7QTEJY1SFBXGejQg==",
+ "dtype": "f4"
+ }
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "paper_bgcolor": "#0f0f17",
+ "plot_bgcolor": "#1a1a28",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#f2f5fa"
+ },
+ "error_y": {
+ "color": "#f2f5fa"
+ },
+ "marker": {
+ "line": {
+ "color": "rgb(17,17,17)",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "rgb(17,17,17)",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#A2B1C6",
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "minorgridcolor": "#506784",
+ "startlinecolor": "#A2B1C6"
+ },
+ "baxis": {
+ "endlinecolor": "#A2B1C6",
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "minorgridcolor": "#506784",
+ "startlinecolor": "#A2B1C6"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "line": {
+ "color": "#283442"
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "line": {
+ "color": "#283442"
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermap": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermap"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#506784"
+ },
+ "line": {
+ "color": "rgb(17,17,17)"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#2a3f5f"
+ },
+ "line": {
+ "color": "rgb(17,17,17)"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#f2f5fa",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#f2f5fa"
+ },
+ "geo": {
+ "bgcolor": "rgb(17,17,17)",
+ "lakecolor": "rgb(17,17,17)",
+ "landcolor": "rgb(17,17,17)",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "#506784"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "dark"
+ },
+ "paper_bgcolor": "rgb(17,17,17)",
+ "plot_bgcolor": "rgb(17,17,17)",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "bgcolor": "rgb(17,17,17)",
+ "radialaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ },
+ "yaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ },
+ "zaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#f2f5fa"
+ }
+ },
+ "sliderdefaults": {
+ "bgcolor": "#C8D4E3",
+ "bordercolor": "rgb(17,17,17)",
+ "borderwidth": 1,
+ "tickwidth": 0
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "bgcolor": "rgb(17,17,17)",
+ "caxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "updatemenudefaults": {
+ "bgcolor": "#506784",
+ "borderwidth": 0
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "#283442",
+ "linecolor": "#506784",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "#283442",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "#283442",
+ "linecolor": "#506784",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "#283442",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "2D Vector Store Visualisation (t-SNE)"
+ },
+ "width": 900,
+ "xaxis": {
+ "title": {
+ "text": "t-SNE Dimension 1"
+ }
+ },
+ "yaxis": {
+ "title": {
+ "text": "t-SNE Dimension 2"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "hoverinfo": "text",
+ "marker": {
+ "color": [
+ "#4dd0e1",
+ "#4dd0e1",
+ "#4dd0e1",
+ "#7c6af7",
+ "#7c6af7",
+ "#7c6af7",
+ "#f06292",
+ "#f06292",
+ "#f06292",
+ "#f06292"
+ ],
+ "opacity": 0.8,
+ "size": 5
+ },
+ "mode": "markers",
+ "text": [
+ "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...",
+ "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...",
+ "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...",
+ "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...",
+ "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...",
+ "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...",
+ "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...",
+ "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...",
+ "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...",
+ "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..."
+ ],
+ "type": "scatter3d",
+ "x": {
+ "bdata": "Rt5bwi6NBkKfSnNC1gRiQZH7xUJLJ/g/L/fLQsdQfsKaOA/CTQJxwg==",
+ "dtype": "f4"
+ },
+ "y": {
+ "bdata": "gfYWQv0Ja0KJfMDC0FO8QuoFAcNkdd3CpyMGQumGHkLlfdTCrrcvQw==",
+ "dtype": "f4"
+ },
+ "z": {
+ "bdata": "FVmLwh/Vy8KQFcXCeCOIQhtWm0J6AqpCp5UXQlJCvUKRYMLCXcrmwg==",
+ "dtype": "f4"
+ }
+ }
+ ],
+ "layout": {
+ "height": 750,
+ "paper_bgcolor": "#0f0f17",
+ "scene": {
+ "bgcolor": "#1a1a28",
+ "xaxis": {
+ "title": {
+ "text": "Dim 1"
+ }
+ },
+ "yaxis": {
+ "title": {
+ "text": "Dim 2"
+ }
+ },
+ "zaxis": {
+ "title": {
+ "text": "Dim 3"
+ }
+ }
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermap": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermap"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "3D Vector Store Visualisation (t-SNE)"
+ },
+ "width": 1000
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Visualisations complete\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "result = _collection.get(include=[\"embeddings\", \"documents\", \"metadatas\"])\n",
+ "vectors_np = np.array(result[\"embeddings\"])\n",
+ "doc_types = [m[\"doc_type\"] for m in result[\"metadatas\"]]\n",
+ "docs_text = result[\"documents\"]\n",
+ "\n",
+ "COLOR_MAP = {\"personal\": \"#7c6af7\", \"projects\": \"#f06292\", \"learning\": \"#4dd0e1\"}\n",
+ "colors = [COLOR_MAP.get(t, \"#aaa\") for t in doc_types]\n",
+ "\n",
+ "n = vectors_np.shape[0]\n",
+ "perplexity = max(5.0, min(30.0, (n - 1) / 3.0))\n",
+ "print(f\"Running t-SNE on {n} vectors (perplexity={perplexity:.1f})...\")\n",
+ "\n",
+ "\n",
+ "tsne_2d = TSNE(n_components=2, random_state=42, perplexity=perplexity, max_iter=1000)\n",
+ "rv_2d = tsne_2d.fit_transform(vectors_np)\n",
+ "\n",
+ "fig_2d = go.Figure(data=[go.Scatter(\n",
+ " x=rv_2d[:, 0], y=rv_2d[:, 1],\n",
+ " mode=\"markers\",\n",
+ " marker=dict(size=7, color=colors, opacity=0.85,\n",
+ " line=dict(width=0.5, color=\"white\")),\n",
+ " text=[f\"Type: {t}
{d[:150]}...\" for t, d in zip(doc_types, docs_text)],\n",
+ " hoverinfo=\"text\"\n",
+ ")])\n",
+ "fig_2d.update_layout(\n",
+ " title=\"2D Vector Store Visualisation (t-SNE)\",\n",
+ " xaxis_title=\"t-SNE Dimension 1\",\n",
+ " yaxis_title=\"t-SNE Dimension 2\",\n",
+ " width=900, height=600,\n",
+ " template=\"plotly_dark\",\n",
+ " paper_bgcolor=\"#0f0f17\",\n",
+ " plot_bgcolor=\"#1a1a28\",\n",
+ ")\n",
+ "fig_2d.show()\n",
+ "\n",
+ "\n",
+ "tsne_3d = TSNE(n_components=3, random_state=42, perplexity=perplexity, max_iter=1000)\n",
+ "rv_3d = tsne_3d.fit_transform(vectors_np)\n",
+ "\n",
+ "fig_3d = go.Figure(data=[go.Scatter3d(\n",
+ " x=rv_3d[:, 0], y=rv_3d[:, 1], z=rv_3d[:, 2],\n",
+ " mode=\"markers\",\n",
+ " marker=dict(size=5, color=colors, opacity=0.8),\n",
+ " text=[f\"Type: {t}
{d[:150]}...\" for t, d in zip(doc_types, docs_text)],\n",
+ " hoverinfo=\"text\"\n",
+ ")])\n",
+ "fig_3d.update_layout(\n",
+ " title=\"3D Vector Store Visualisation (t-SNE)\",\n",
+ " scene=dict(\n",
+ " xaxis_title=\"Dim 1\", yaxis_title=\"Dim 2\", zaxis_title=\"Dim 3\",\n",
+ " bgcolor=\"#1a1a28\"\n",
+ " ),\n",
+ " width=1000, height=750,\n",
+ " paper_bgcolor=\"#0f0f17\",\n",
+ ")\n",
+ "fig_3d.show()\n",
+ "\n",
+ "print(\"✅ Visualisations complete\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e86e4b35",
+ "metadata": {},
+ "source": [
+ "## 9. RAG Chain\n",
+ "\n",
+ "Query rewriting + retrieval + reranking + generation — all via Claude."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "6805eec2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting nbformat\n",
+ " Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)\n",
+ "Collecting fastjsonschema>=2.15 (from nbformat)\n",
+ " Using cached fastjsonschema-2.21.2-py3-none-any.whl.metadata (2.3 kB)\n",
+ "Requirement already satisfied: jsonschema>=2.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (4.26.0)\n",
+ "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.9.1)\n",
+ "Requirement already satisfied: traitlets>=5.1 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.14.3)\n",
+ "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (25.4.0)\n",
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (2025.9.1)\n",
+ "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.37.0)\n",
+ "Requirement already satisfied: rpds-py>=0.25.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.30.0)\n",
+ "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat) (4.9.2)\n",
+ "Requirement already satisfied: typing-extensions>=4.4.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from referencing>=0.28.4->jsonschema>=2.6->nbformat) (4.15.0)\n",
+ "Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)\n",
+ "Using cached fastjsonschema-2.21.2-py3-none-any.whl (24 kB)\n",
+ "Installing collected packages: fastjsonschema, nbformat\n",
+ "Successfully installed fastjsonschema-2.21.2 nbformat-5.10.4\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "[notice] A new release of pip is available: 24.0 -> 26.0.1\n",
+ "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install nbformat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "24877f38",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RAG chain ready. Running smoke test...\n",
+ "\n",
+ "Q: What is the person's background?\n",
+ "A: Based on the profile document, here's the person's background:\n",
+ "\n",
+ "**Professional Background:**\n",
+ "- Currently works as a Senior Software Engineer at TechCorp\n",
+ "- Previously worked at two startups before joining TechCorp\n",
+ "\n",
+ "**Technical Skills:**\n",
+ "- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n",
+ "- Frameworks: React, FastAPI, LangChain, Gradio\n",
+ "- AI/ML: LLMs, RAG systems, Vector Databases, Prompt Engineering, Fine-tuning\n",
+ "- Databases: PostgreSQL, MongoDB, Chroma, Pinecone, Redis\n",
+ "- Cloud: AWS, GCP, Docker, Kubernetes\n",
+ "\n",
+ "**Certifications:**\n",
+ "- AWS Certified Solutions Architect — Associate (2023)\n",
+ "- Google Professional Data Engineer (2024)\n",
+ "- Certified Kubernetes Application Developer — CKAD (2023)\n",
+ "\n",
+ "**Personal Interests:**\n",
+ "- Exploring new technologies\n",
+ "- Contributing to open-source projects\n",
+ "- Mentoring aspiring developers\n",
+ "- Hiking and reading tech blogs\n",
+ "- Experimenting with new AI tools\n",
+ "- Runs a small tech blog with 2,000 monthly readers\n",
+ "\n",
+ "**Contact Information:**\n",
+ "- GitHub: github.com/alexj\n",
+ "- LinkedIn: linkedin.com/in/alexjohnson\n",
+ "- Email: alex@techmail.com\n",
+ "\n",
+ "*Source: personal/profile.md and learning/journey.md*\n",
+ "\n",
+ "_Sources: learning, personal, projects_\n"
+ ]
+ }
+ ],
+ "source": [
+ "SYSTEM_PROMPT = textwrap.dedent(\"\"\"\n",
+ " You are a helpful personal knowledge assistant.\n",
+ " Answer questions accurately and concisely using ONLY the context provided.\n",
+ " If the answer is not in the context, say so honestly.\n",
+ " Always cite which source document your answer comes from.\n",
+ "\n",
+ " CONTEXT:\n",
+ " {context}\n",
+ "\"\"\")\n",
+ "\n",
+ "\n",
+ "def retrieve_chunks(query: str, k: int = TOP_K_RESULTS) -> list[dict]:\n",
+ " \"\"\"Embed the query and fetch the top-k chunks from ChromaDB.\"\"\"\n",
+ " q_vec = embed_texts([query])[0]\n",
+ " results = _collection.query(\n",
+ " query_embeddings=[q_vec],\n",
+ " n_results=min(k, _collection.count())\n",
+ " )\n",
+ " return [\n",
+ " {\"content\": doc, \"doc_type\": meta[\"doc_type\"], \"source\": meta[\"source\"]}\n",
+ " for doc, meta in zip(results[\"documents\"][0], results[\"metadatas\"][0])\n",
+ " ]\n",
+ "\n",
+ "\n",
+ "def chat(question: str, history: list) -> str:\n",
+ " \"\"\"\n",
+ " Full RAG pipeline:\n",
+ " 1. Retrieve relevant chunks\n",
+ " 2. Build context\n",
+ " 3. Generate answer with Claude (using full conversation history)\n",
+ " \"\"\"\n",
+ " global _chat_history\n",
+ "\n",
+ " if _collection is None:\n",
+ " return \"⚠️ Vector store not built yet — run the cells above first.\"\n",
+ "\n",
+ " try:\n",
+ " client = get_client()\n",
+ "\n",
+ " \n",
+ " chunks = retrieve_chunks(question)\n",
+ "\n",
+ " \n",
+ " context = \"\\n\\n---\\n\\n\".join(\n",
+ " f\"[{c['doc_type']} / {c['source']}]\\n{c['content']}\"\n",
+ " for c in chunks\n",
+ " )\n",
+ " sources = sorted(set(c[\"doc_type\"] for c in chunks))\n",
+ "\n",
+ " \n",
+ " response = client.messages.create(\n",
+ " model=ANTHROPIC_MODEL,\n",
+ " max_tokens=1024,\n",
+ " system=SYSTEM_PROMPT.format(context=context),\n",
+ " messages=_chat_history + [{\"role\": \"user\", \"content\": question}]\n",
+ " )\n",
+ " answer = response.content[0].text.strip()\n",
+ "\n",
+ " \n",
+ " _chat_history.append({\"role\": \"user\", \"content\": question})\n",
+ " _chat_history.append({\"role\": \"assistant\", \"content\": answer})\n",
+ "\n",
+ " return answer + f\"\\n\\n_Sources: {', '.join(sources)}_\"\n",
+ "\n",
+ " except Exception as e:\n",
+ " return f\"❌ Error: {e}\"\n",
+ "\n",
+ "\n",
+ "print(\"RAG chain ready. Running smoke test...\\n\")\n",
+ "_chat_history = []\n",
+ "test_q = \"What is the person's background?\"\n",
+ "print(f\"Q: {test_q}\")\n",
+ "print(f\"A: {chat(test_q, [])}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24e0e4f7",
+ "metadata": {},
+ "source": [
+ "## 10. Gradio Chat Interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48799a64",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✅ Gradio interface configured\n"
+ ]
+ }
+ ],
+ "source": [
+ "def create_gradio_interface():\n",
+ " \"\"\"Build and return the Gradio Blocks UI.\"\"\"\n",
+ "\n",
+ " THEME = gr.themes.Base(\n",
+ " primary_hue=\"violet\",\n",
+ " secondary_hue=\"purple\",\n",
+ " neutral_hue=\"slate\",\n",
+ " font=[gr.themes.GoogleFont(\"Inter\"), \"ui-sans-serif\", \"sans-serif\"],\n",
+ " ).set(\n",
+ " body_background_fill=\"#0f0f17\",\n",
+ " body_text_color=\"#e2e0f0\",\n",
+ " block_background_fill=\"#1a1a28\",\n",
+ " block_border_color=\"#2d2d45\",\n",
+ " block_title_text_color=\"#c4b5fd\",\n",
+ " input_background_fill=\"#12121e\",\n",
+ " button_primary_background_fill=\"linear-gradient(135deg, #7c3aed, #a855f7)\",\n",
+ " button_primary_text_color=\"#fff\",\n",
+ " )\n",
+ "\n",
+ " with gr.Blocks(theme=THEME) as ui:\n",
+ "\n",
+ " gr.HTML(\"\"\"\n",
+ "
\n", + " Powered by Claude · RAG over your personal knowledge base\n", + "
\n", + "