From 349c610450b43cef075644df3d894e8c0fba68fc Mon Sep 17 00:00:00 2001 From: Jonas Thamane <166150947+NathiJonas@users.noreply.github.com> Date: Sun, 8 Mar 2026 23:54:11 +0200 Subject: [PATCH 1/2] Create Jonas Thamane Week 5 PR.ipynb --- .../Jonas Thamane Week 5 PR.ipynb | 2764 +++++++++++++++++ 1 file changed, 2764 insertions(+) create mode 100644 community-contributions/Jonas Thamane Week 5 PR.ipynb diff --git a/community-contributions/Jonas Thamane Week 5 PR.ipynb b/community-contributions/Jonas Thamane Week 5 PR.ipynb new file mode 100644 index 0000000000..de4141564d --- /dev/null +++ b/community-contributions/Jonas Thamane Week 5 PR.ipynb @@ -0,0 +1,2764 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "169f472e", + "metadata": {}, + "source": [ + "# Week 5 Exercise — Personal Information Processor with RAG\n", + "\n", + "A complete RAG pipeline rebuilt with **Anthropic Claude API** \n", + "\n", + "### Features\n", + "- Document loading from multiple folders (Markdown files)\n", + "- Intelligent text chunking with overlap\n", + "- Vector embeddings via Claude\n", + "- ChromaDB vector store for efficient retrieval\n", + "- t-SNE visualization (2D and 3D)\n", + "- Conversational RAG with memory\n", + "- Gradio chat interface\n", + "- Source attribution in answers" + ] + }, + { + "cell_type": "markdown", + "id": "9df19ead", + "metadata": {}, + "source": [ + "## 1. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ee0830c0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 24.0 -> 26.0.1\n", + "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install -q gradio anthropic chromadb python-dotenv numpy plotly scikit-learn" + ] + }, + { + "cell_type": "markdown", + "id": "b00f5a61", + "metadata": {}, + "source": [ + "## 2. Setup and Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd5a77b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Anthropic API Key found: sk-ant-api03-me...\n", + "✅ Configuration ready\n" + ] + } + ], + "source": [ + "import os\n", + "import re\n", + "import uuid\n", + "import json\n", + "import glob\n", + "import textwrap\n", + "from pathlib import Path\n", + "from typing import Optional\n", + "\n", + "import numpy as np\n", + "import plotly.graph_objects as go\n", + "from sklearn.manifold import TSNE\n", + "import gradio as gr\n", + "import anthropic\n", + "import chromadb\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "\n", + "ANTHROPIC_MODEL = \"claude-sonnet-4-20250514\"\n", + "DB_NAME = \"personal_knowledge_db\"\n", + "CHUNK_SIZE = 500 # characters per chunk\n", + "CHUNK_OVERLAP = 100 # character overlap between chunks\n", + "TOP_K_RESULTS = 5 # chunks to retrieve per query\n", + "KNOWLEDGE_BASE_PATH = Path(\"knowledge_base\")\n", + "\n", + "api_key = os.getenv(\"ANTHROPIC_API_KEY\", \"\")\n", + "if api_key:\n", + " print(f\"Anthropic API Key found: {api_key[:15]}...\")\n", + "else:\n", + " print(\"⚠️ ANTHROPIC_API_KEY not set — add it to a .env file or set os.environ directly\")\n", + "\n", + "print(\"✅ Configuration ready\")" + ] + }, + { + "cell_type": "markdown", + "id": "4c2bf47d", + "metadata": {}, + "source": [ + "## 3. Global State" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "636ae5d6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Global state initialised\n" + ] + } + ], + "source": [ + "_client: Optional[anthropic.Anthropic] = None\n", + "_chroma: Optional[chromadb.Client] = None\n", + "_collection: Optional[chromadb.Collection] = None\n", + "_chat_history: list[dict] = [] \n", + "\n", + "\n", + "def get_client() -> anthropic.Anthropic:\n", + " global _client\n", + " if _client is None:\n", + " key = os.getenv(\"ANTHROPIC_API_KEY\", \"\")\n", + " if not key:\n", + " raise ValueError(\"ANTHROPIC_API_KEY not set\")\n", + " _client = anthropic.Anthropic(api_key=key)\n", + " return _client\n", + "\n", + "\n", + "print(\"✅ Global state initialised\")" + ] + }, + { + "cell_type": "markdown", + "id": "211bf388", + "metadata": {}, + "source": [ + "## 4. Sample Knowledge Base" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "ea9c4f6f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Exists: knowledge_base\\personal\\profile.md\n", + " Exists: knowledge_base\\projects\\portfolio.md\n", + " Exists: knowledge_base\\learning\\journey.md\n", + "\n", + "✅ Knowledge base ready at: C:\\Users\\Lenovo\\Downloads\\knowledge_base\n" + ] + } + ], + "source": [ + "def create_sample_knowledge_base():\n", + " \"\"\"Create sample knowledge base with personal, projects, and learning data.\"\"\"\n", + "\n", + " for folder in [\"personal\", \"projects\", \"learning\"]:\n", + " (KNOWLEDGE_BASE_PATH / folder).mkdir(parents=True, exist_ok=True)\n", + "\n", + " files = {\n", + " \"personal/profile.md\": \"\"\"\n", + "# Personal Profile\n", + "\n", + "## About Me\n", + "Name: Alex Johnson\n", + "Role: Software Engineer & AI Enthusiast\n", + "Location: Tech Hub City\n", + "\n", + "## Background\n", + "I am a passionate software engineer with over 5 years of experience building scalable applications.\n", + "My journey started with web development and has evolved into specialising in AI and machine learning.\n", + "I completed my Computer Science degree at State University and have since worked at two startups.\n", + "\n", + "## Skills\n", + "- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n", + "- Frameworks: React, FastAPI, LangChain, Gradio\n", + "- AI/ML: LLMs, RAG systems, Vector Databases, Prompt Engineering, Fine-tuning\n", + "- Databases: PostgreSQL, MongoDB, Chroma, Pinecone, Redis\n", + "- Cloud: AWS, GCP, Docker, Kubernetes\n", + "\n", + "## Interests\n", + "I love exploring new technologies, contributing to open-source projects, and mentoring aspiring developers.\n", + "In my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\n", + "I run a small tech blog with 2,000 monthly readers.\n", + "\n", + "## Contact\n", + "GitHub: github.com/alexj\n", + "LinkedIn: linkedin.com/in/alexjohnson\n", + "Email: alex@techmail.com\n", + "\"\"\",\n", + "\n", + " \"projects/portfolio.md\": \"\"\"\n", + "# Projects Portfolio\n", + "\n", + "## AI-Powered Document Assistant\n", + "A RAG-based system that helps users query large document collections efficiently.\n", + "Tech Stack: Python, LangChain, Chroma, Anthropic Claude API\n", + "Status: Shipped — used by 300+ beta users\n", + "Key Features: Semantic search, multi-document support, conversation history, source attribution\n", + "Lessons Learned: Chunking strategy matters enormously; smaller overlapping chunks outperform large ones.\n", + "\n", + "## Real-time Analytics Dashboard\n", + "Built a scalable dashboard for visualising business metrics in real-time.\n", + "Tech Stack: React, Node.js, PostgreSQL, Redis, WebSockets\n", + "Impact: Reduced reporting time by 80% for the operations team.\n", + "Challenges: Handling 10,000 concurrent WebSocket connections required careful connection pooling.\n", + "\n", + "## Code Review Automation Tool\n", + "An AI assistant that provides automated code reviews and suggestions.\n", + "Tech Stack: Python, GitHub API, Claude API\n", + "Features: Pattern detection, best practices recommendations, security vulnerability scanning\n", + "Status: Open-sourced on GitHub with 450 stars\n", + "\n", + "## Personal Finance Tracker\n", + "Full-stack application for tracking expenses and predicting future spending patterns.\n", + "Tech Stack: React Native, FastAPI, PostgreSQL, ML forecasting\n", + "Features: Receipt scanning via OCR, budget alerts, spending trend analysis\n", + "\n", + "## E-commerce Recommendation Engine\n", + "Built a collaborative filtering engine for a mid-size online retailer.\n", + "Tech Stack: Python, PyTorch, FastAPI, Redis\n", + "Impact: 23% increase in average order value after deployment.\n", + "\"\"\",\n", + "\n", + " \"learning/journey.md\": \"\"\"\n", + "# Learning Journey\n", + "\n", + "## Currently Learning (2025)\n", + "- Advanced RAG techniques: reranking, hybrid search, query decomposition\n", + "- Anthropic's Claude API: tool use, streaming, multi-turn conversations\n", + "- Rust programming language: ownership model, async Tokio runtime\n", + "- System design: distributed systems, consensus algorithms\n", + "\n", + "## Completed Courses\n", + "- Deep Learning Specialisation — Coursera (Andrew Ng) — 2024\n", + "- Fullstack Open — University of Helsinki — 2022\n", + "- AWS Solutions Architect Associate — 2023\n", + "- Fast.ai Practical Deep Learning — 2024\n", + "\n", + "## Books Read\n", + "- \"Designing Data-Intensive Applications\" — Martin Kleppmann\n", + "- \"The Pragmatic Programmer\" — Hunt & Thomas\n", + "- \"Building Machine Learning Powered Applications\" — Emmanuel Ameisen\n", + "- \"Attention Is All You Need\" — Vaswani et al. (paper)\n", + "\n", + "## Certifications\n", + "- AWS Certified Solutions Architect — Associate (2023)\n", + "- Google Professional Data Engineer (2024)\n", + "- Certified Kubernetes Application Developer — CKAD (2023)\n", + "\n", + "## Learning Goals for 2026\n", + "- Contribute to a major open-source AI project\n", + "- Build and ship a SaaS product from scratch\n", + "- Complete a Rust systems project\n", + "- Publish 3 technical blog posts on RAG architecture\n", + "\n", + "## Study Schedule\n", + "Mornings (6-7am): Reading / papers\n", + "Lunch (12-1pm): Coding exercises\n", + "Evenings (8-9pm): Project work or courses\n", + "\"\"\"\n", + " }\n", + "\n", + " for path, content in files.items():\n", + " full_path = KNOWLEDGE_BASE_PATH / path\n", + " if not full_path.exists():\n", + " full_path.write_text(content.strip(), encoding=\"utf-8\")\n", + " print(f\" Created: {full_path}\")\n", + " else:\n", + " print(f\" Exists: {full_path}\")\n", + "\n", + " print(f\"\\n✅ Knowledge base ready at: {KNOWLEDGE_BASE_PATH.resolve()}\")\n", + "\n", + "\n", + "create_sample_knowledge_base()" + ] + }, + { + "cell_type": "markdown", + "id": "f2de26a5", + "metadata": {}, + "source": [ + "## 5. Document Loading & Chunking" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4d2684a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 3 documents\n", + "Document types: ['learning', 'personal', 'projects']\n", + "\n", + "Total chunks: 10\n", + "Average chunk size: 460 characters\n", + "Chunks per doc type: {'projects': 4, 'learning': 3, 'personal': 3}\n" + ] + } + ], + "source": [ + "def load_documents() -> list[dict]:\n", + " \"\"\"\n", + " Walk each subfolder of the knowledge base and load all .md files.\n", + " Returns list of {text, doc_type, source}.\n", + " \"\"\"\n", + " documents = []\n", + " for folder in sorted(KNOWLEDGE_BASE_PATH.iterdir()):\n", + " if not folder.is_dir():\n", + " continue\n", + " doc_type = folder.name\n", + " for file in sorted(folder.rglob(\"*.md\")):\n", + " text = file.read_text(encoding=\"utf-8\", errors=\"ignore\")\n", + " documents.append({\n", + " \"text\": text,\n", + " \"doc_type\": doc_type,\n", + " \"source\": file.name,\n", + " })\n", + " print(f\"Loaded {len(documents)} documents\")\n", + " print(f\"Document types: {sorted(set(d['doc_type'] for d in documents))}\")\n", + " return documents\n", + "\n", + "\n", + "def chunk_document(doc: dict,\n", + " chunk_size: int = CHUNK_SIZE,\n", + " overlap: int = CHUNK_OVERLAP) -> list[dict]:\n", + " \"\"\"\n", + " Split a document into overlapping character-level chunks.\n", + " Each chunk inherits the document's metadata.\n", + " \"\"\"\n", + " text = doc[\"text\"]\n", + " chunks = []\n", + " start = 0\n", + " while start < len(text):\n", + " end = min(start + chunk_size, len(text))\n", + " chunks.append({\n", + " \"content\": text[start:end],\n", + " \"doc_type\": doc[\"doc_type\"],\n", + " \"source\": doc[\"source\"],\n", + " })\n", + " if end == len(text):\n", + " break\n", + " start += chunk_size - overlap\n", + " return chunks\n", + "\n", + "\n", + "# Load and chunk\n", + "documents = load_documents()\n", + "\n", + "all_chunks: list[dict] = []\n", + "for doc in documents:\n", + " all_chunks.extend(chunk_document(doc))\n", + "\n", + "print(f\"\\nTotal chunks: {len(all_chunks)}\")\n", + "avg = sum(len(c['content']) for c in all_chunks) / len(all_chunks)\n", + "print(f\"Average chunk size: {avg:.0f} characters\")\n", + "print(f\"Chunks per doc type: { {t: sum(1 for c in all_chunks if c['doc_type']==t) for t in set(c['doc_type'] for c in all_chunks)} }\")" + ] + }, + { + "cell_type": "markdown", + "id": "03c2bf57", + "metadata": {}, + "source": [ + "## 6. Embeddings\n", + "\n", + "Uses Claude to generate 128-dim semantic embeddings with robust JSON parsing and auto-retry." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0c76d4b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Embedding functions defined\n" + ] + } + ], + "source": [ + "def _parse_embedding(raw: str) -> list[float]:\n", + " \"\"\"Robustly extract a float array from Claude's response.\"\"\"\n", + " raw = re.sub(r\"```[a-z]*\", \"\", raw).strip().strip(\"`\").strip()\n", + "\n", + " try:\n", + " vec = json.loads(raw)\n", + " if isinstance(vec, list):\n", + " return [float(x) for x in vec]\n", + " except Exception:\n", + " pass\n", + "\n", + " m = re.search(r\"(\\[.*?\\])\", raw, re.DOTALL)\n", + " if m:\n", + " candidate = re.sub(r\",\\s*]\", \"]\", m.group(1))\n", + " try:\n", + " vec = json.loads(candidate)\n", + " if isinstance(vec, list):\n", + " return [float(x) for x in vec]\n", + " except Exception:\n", + " pass\n", + "\n", + " nums = re.findall(r\"-?\\d+\\.\\d+(?:[eE][+-]?\\d+)?|-?\\d+\", raw)\n", + " if nums:\n", + " return [float(n) for n in nums]\n", + "\n", + " raise ValueError(f\"Cannot parse embedding from:\\n{raw[:300]}\")\n", + "\n", + "\n", + "def embed_texts(texts: list[str], max_retries: int = 3) -> list[list[float]]:\n", + " \"\"\"\n", + " Generate 128-dim semantic embeddings via Claude.\n", + " Includes robust JSON parsing and retry logic.\n", + " Swap for voyage-3 / text-embedding-3-small in production.\n", + " \"\"\"\n", + " client = get_client()\n", + " embeddings = []\n", + "\n", + " for i, text in enumerate(texts):\n", + " safe = re.sub(r\"[^\\x20-\\x7E]\", \" \", text).strip()[:800]\n", + " prompt = (\n", + " \"Return a JSON array of exactly 128 floats (values between -1 and 1) \"\n", + " \"representing the semantic embedding of the text below.\\n\"\n", + " \"Rules: output ONLY the JSON array starting with [ and ending with ]. \"\n", + " \"No prose, no markdown. Use at most 6 decimal places. Do NOT truncate.\\n\\n\"\n", + " f\"TEXT:\\n{safe}\"\n", + " )\n", + " vec = None\n", + " for attempt in range(max_retries):\n", + " try:\n", + " resp = client.messages.create(\n", + " model=ANTHROPIC_MODEL,\n", + " max_tokens=1200,\n", + " messages=[{\"role\": \"user\", \"content\": prompt}]\n", + " )\n", + " vec = _parse_embedding(resp.content[0].text.strip())\n", + " if len(vec) < 128:\n", + " vec += [0.0] * (128 - len(vec))\n", + " vec = vec[:128]\n", + " break\n", + " except Exception as e:\n", + " print(f\" [embed {i}] attempt {attempt+1} failed: {e}\")\n", + "\n", + " if vec is None:\n", + " raise RuntimeError(f\"Embedding failed for chunk {i} after {max_retries} attempts\")\n", + "\n", + " norm = sum(x**2 for x in vec) ** 0.5 or 1.0\n", + " embeddings.append([x / norm for x in vec])\n", + "\n", + " return embeddings\n", + "\n", + "\n", + "print(\"✅ Embedding functions defined\")" + ] + }, + { + "cell_type": "markdown", + "id": "3cc77897", + "metadata": {}, + "source": [ + "## 7. Build the Vector Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54ca1938", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embedding 10 chunks (this may take a minute)...\n", + "\n", + "✅ Vector store ready — 10 chunks stored\n", + " Embedding dimensions: 128\n" + ] + } + ], + "source": [ + "print(f\"Embedding {len(all_chunks)} chunks (this may take a minute)...\")\n", + "\n", + "_chroma = chromadb.Client() \n", + "col_name = \"personal_rag\"\n", + "if col_name in [c.name for c in _chroma.list_collections()]:\n", + " _chroma.delete_collection(col_name)\n", + "_collection = _chroma.get_or_create_collection(\n", + " col_name, metadata={\"hnsw:space\": \"cosine\"}\n", + ")\n", + "\n", + "vectors = embed_texts([c[\"content\"] for c in all_chunks])\n", + "\n", + "_collection.add(\n", + " ids = [str(uuid.uuid4()) for _ in all_chunks],\n", + " embeddings= vectors,\n", + " documents = [c[\"content\"] for c in all_chunks],\n", + " metadatas = [{\"doc_type\": c[\"doc_type\"], \"source\": c[\"source\"]} for c in all_chunks],\n", + ")\n", + "\n", + "print(f\"\\n✅ Vector store ready — {_collection.count():,} chunks stored\")\n", + "print(f\" Embedding dimensions: 128\")" + ] + }, + { + "cell_type": "markdown", + "id": "23eefcfa", + "metadata": {}, + "source": [ + "## 8. t-SNE Visualisation (2D & 3D)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a54bd7f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f38ffcd8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running t-SNE on 10 vectors (perplexity=5.0)...\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hoverinfo": "text", + "marker": { + "color": [ + "#4dd0e1", + "#4dd0e1", + "#4dd0e1", + "#7c6af7", + "#7c6af7", + "#7c6af7", + "#f06292", + "#f06292", + "#f06292", + "#f06292" + ], + "line": { + "color": "white", + "width": 0.5 + }, + "opacity": 0.85, + "size": 7 + }, + "mode": "markers", + "text": [ + "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...", + "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...", + "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...", + "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...", + "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...", + "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...", + "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...", + "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...", + "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...", + "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..." + ], + "type": "scatter", + "x": { + "bdata": "VFF9winPucKlebdB6VmIwrQdVUIaBgVC0CFxwgBTHML2SUlCqfk8QQ==", + "dtype": "f4" + }, + "y": { + "bdata": "2bg+wq+WxsGq+ITB042bQgc66ELOwhZDIH/9Ql7QTEJY1SFBXGejQg==", + "dtype": "f4" + } + } + ], + "layout": { + "height": 600, + "paper_bgcolor": "#0f0f17", + "plot_bgcolor": "#1a1a28", + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#f2f5fa" + }, + "error_y": { + "color": "#f2f5fa" + }, + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "baxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#506784" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "header": { + "fill": { + "color": "#2a3f5f" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#f2f5fa", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#f2f5fa" + }, + "geo": { + "bgcolor": "rgb(17,17,17)", + "lakecolor": "rgb(17,17,17)", + "landcolor": "rgb(17,17,17)", + "showlakes": true, + "showland": true, + "subunitcolor": "#506784" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "dark" + }, + "paper_bgcolor": "rgb(17,17,17)", + "plot_bgcolor": "rgb(17,17,17)", + "polar": { + "angularaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "radialaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "yaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "zaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + } + }, + "shapedefaults": { + "line": { + "color": "#f2f5fa" + } + }, + "sliderdefaults": { + "bgcolor": "#C8D4E3", + "bordercolor": "rgb(17,17,17)", + "borderwidth": 1, + "tickwidth": 0 + }, + "ternary": { + "aaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "baxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "caxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "updatemenudefaults": { + "bgcolor": "#506784", + "borderwidth": 0 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "2D Vector Store Visualisation (t-SNE)" + }, + "width": 900, + "xaxis": { + "title": { + "text": "t-SNE Dimension 1" + } + }, + "yaxis": { + "title": { + "text": "t-SNE Dimension 2" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hoverinfo": "text", + "marker": { + "color": [ + "#4dd0e1", + "#4dd0e1", + "#4dd0e1", + "#7c6af7", + "#7c6af7", + "#7c6af7", + "#f06292", + "#f06292", + "#f06292", + "#f06292" + ], + "opacity": 0.8, + "size": 5 + }, + "mode": "markers", + "text": [ + "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...", + "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...", + "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...", + "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...", + "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...", + "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...", + "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...", + "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...", + "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...", + "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..." + ], + "type": "scatter3d", + "x": { + "bdata": "Rt5bwi6NBkKfSnNC1gRiQZH7xUJLJ/g/L/fLQsdQfsKaOA/CTQJxwg==", + "dtype": "f4" + }, + "y": { + "bdata": "gfYWQv0Ja0KJfMDC0FO8QuoFAcNkdd3CpyMGQumGHkLlfdTCrrcvQw==", + "dtype": "f4" + }, + "z": { + "bdata": "FVmLwh/Vy8KQFcXCeCOIQhtWm0J6AqpCp5UXQlJCvUKRYMLCXcrmwg==", + "dtype": "f4" + } + } + ], + "layout": { + "height": 750, + "paper_bgcolor": "#0f0f17", + "scene": { + "bgcolor": "#1a1a28", + "xaxis": { + "title": { + "text": "Dim 1" + } + }, + "yaxis": { + "title": { + "text": "Dim 2" + } + }, + "zaxis": { + "title": { + "text": "Dim 3" + } + } + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "3D Vector Store Visualisation (t-SNE)" + }, + "width": 1000 + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Visualisations complete\n" + ] + } + ], + "source": [ + "\n", + "result = _collection.get(include=[\"embeddings\", \"documents\", \"metadatas\"])\n", + "vectors_np = np.array(result[\"embeddings\"])\n", + "doc_types = [m[\"doc_type\"] for m in result[\"metadatas\"]]\n", + "docs_text = result[\"documents\"]\n", + "\n", + "COLOR_MAP = {\"personal\": \"#7c6af7\", \"projects\": \"#f06292\", \"learning\": \"#4dd0e1\"}\n", + "colors = [COLOR_MAP.get(t, \"#aaa\") for t in doc_types]\n", + "\n", + "n = vectors_np.shape[0]\n", + "perplexity = max(5.0, min(30.0, (n - 1) / 3.0))\n", + "print(f\"Running t-SNE on {n} vectors (perplexity={perplexity:.1f})...\")\n", + "\n", + "\n", + "tsne_2d = TSNE(n_components=2, random_state=42, perplexity=perplexity, max_iter=1000)\n", + "rv_2d = tsne_2d.fit_transform(vectors_np)\n", + "\n", + "fig_2d = go.Figure(data=[go.Scatter(\n", + " x=rv_2d[:, 0], y=rv_2d[:, 1],\n", + " mode=\"markers\",\n", + " marker=dict(size=7, color=colors, opacity=0.85,\n", + " line=dict(width=0.5, color=\"white\")),\n", + " text=[f\"Type: {t}
{d[:150]}...\" for t, d in zip(doc_types, docs_text)],\n", + " hoverinfo=\"text\"\n", + ")])\n", + "fig_2d.update_layout(\n", + " title=\"2D Vector Store Visualisation (t-SNE)\",\n", + " xaxis_title=\"t-SNE Dimension 1\",\n", + " yaxis_title=\"t-SNE Dimension 2\",\n", + " width=900, height=600,\n", + " template=\"plotly_dark\",\n", + " paper_bgcolor=\"#0f0f17\",\n", + " plot_bgcolor=\"#1a1a28\",\n", + ")\n", + "fig_2d.show()\n", + "\n", + "\n", + "tsne_3d = TSNE(n_components=3, random_state=42, perplexity=perplexity, max_iter=1000)\n", + "rv_3d = tsne_3d.fit_transform(vectors_np)\n", + "\n", + "fig_3d = go.Figure(data=[go.Scatter3d(\n", + " x=rv_3d[:, 0], y=rv_3d[:, 1], z=rv_3d[:, 2],\n", + " mode=\"markers\",\n", + " marker=dict(size=5, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
{d[:150]}...\" for t, d in zip(doc_types, docs_text)],\n", + " hoverinfo=\"text\"\n", + ")])\n", + "fig_3d.update_layout(\n", + " title=\"3D Vector Store Visualisation (t-SNE)\",\n", + " scene=dict(\n", + " xaxis_title=\"Dim 1\", yaxis_title=\"Dim 2\", zaxis_title=\"Dim 3\",\n", + " bgcolor=\"#1a1a28\"\n", + " ),\n", + " width=1000, height=750,\n", + " paper_bgcolor=\"#0f0f17\",\n", + ")\n", + "fig_3d.show()\n", + "\n", + "print(\"✅ Visualisations complete\")" + ] + }, + { + "cell_type": "markdown", + "id": "e86e4b35", + "metadata": {}, + "source": [ + "## 9. RAG Chain\n", + "\n", + "Query rewriting + retrieval + reranking + generation — all via Claude." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6805eec2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting nbformat\n", + " Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting fastjsonschema>=2.15 (from nbformat)\n", + " Using cached fastjsonschema-2.21.2-py3-none-any.whl.metadata (2.3 kB)\n", + "Requirement already satisfied: jsonschema>=2.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (4.26.0)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.9.1)\n", + "Requirement already satisfied: traitlets>=5.1 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.14.3)\n", + "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (25.4.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (2025.9.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.37.0)\n", + "Requirement already satisfied: rpds-py>=0.25.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.30.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat) (4.9.2)\n", + "Requirement already satisfied: typing-extensions>=4.4.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from referencing>=0.28.4->jsonschema>=2.6->nbformat) (4.15.0)\n", + "Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)\n", + "Using cached fastjsonschema-2.21.2-py3-none-any.whl (24 kB)\n", + "Installing collected packages: fastjsonschema, nbformat\n", + "Successfully installed fastjsonschema-2.21.2 nbformat-5.10.4\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 24.0 -> 26.0.1\n", + "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "pip install nbformat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24877f38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RAG chain ready. Running smoke test...\n", + "\n", + "Q: What is the person's background?\n", + "A: Based on the profile document, here's the person's background:\n", + "\n", + "**Professional Background:**\n", + "- Currently works as a Senior Software Engineer at TechCorp\n", + "- Previously worked at two startups before joining TechCorp\n", + "\n", + "**Technical Skills:**\n", + "- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n", + "- Frameworks: React, FastAPI, LangChain, Gradio\n", + "- AI/ML: LLMs, RAG systems, Vector Databases, Prompt Engineering, Fine-tuning\n", + "- Databases: PostgreSQL, MongoDB, Chroma, Pinecone, Redis\n", + "- Cloud: AWS, GCP, Docker, Kubernetes\n", + "\n", + "**Certifications:**\n", + "- AWS Certified Solutions Architect — Associate (2023)\n", + "- Google Professional Data Engineer (2024)\n", + "- Certified Kubernetes Application Developer — CKAD (2023)\n", + "\n", + "**Personal Interests:**\n", + "- Exploring new technologies\n", + "- Contributing to open-source projects\n", + "- Mentoring aspiring developers\n", + "- Hiking and reading tech blogs\n", + "- Experimenting with new AI tools\n", + "- Runs a small tech blog with 2,000 monthly readers\n", + "\n", + "**Contact Information:**\n", + "- GitHub: github.com/alexj\n", + "- LinkedIn: linkedin.com/in/alexjohnson\n", + "- Email: alex@techmail.com\n", + "\n", + "*Source: personal/profile.md and learning/journey.md*\n", + "\n", + "_Sources: learning, personal, projects_\n" + ] + } + ], + "source": [ + "SYSTEM_PROMPT = textwrap.dedent(\"\"\"\n", + " You are a helpful personal knowledge assistant.\n", + " Answer questions accurately and concisely using ONLY the context provided.\n", + " If the answer is not in the context, say so honestly.\n", + " Always cite which source document your answer comes from.\n", + "\n", + " CONTEXT:\n", + " {context}\n", + "\"\"\")\n", + "\n", + "\n", + "def retrieve_chunks(query: str, k: int = TOP_K_RESULTS) -> list[dict]:\n", + " \"\"\"Embed the query and fetch the top-k chunks from ChromaDB.\"\"\"\n", + " q_vec = embed_texts([query])[0]\n", + " results = _collection.query(\n", + " query_embeddings=[q_vec],\n", + " n_results=min(k, _collection.count())\n", + " )\n", + " return [\n", + " {\"content\": doc, \"doc_type\": meta[\"doc_type\"], \"source\": meta[\"source\"]}\n", + " for doc, meta in zip(results[\"documents\"][0], results[\"metadatas\"][0])\n", + " ]\n", + "\n", + "\n", + "def chat(question: str, history: list) -> str:\n", + " \"\"\"\n", + " Full RAG pipeline:\n", + " 1. Retrieve relevant chunks\n", + " 2. Build context\n", + " 3. Generate answer with Claude (using full conversation history)\n", + " \"\"\"\n", + " global _chat_history\n", + "\n", + " if _collection is None:\n", + " return \"⚠️ Vector store not built yet — run the cells above first.\"\n", + "\n", + " try:\n", + " client = get_client()\n", + "\n", + " \n", + " chunks = retrieve_chunks(question)\n", + "\n", + " \n", + " context = \"\\n\\n---\\n\\n\".join(\n", + " f\"[{c['doc_type']} / {c['source']}]\\n{c['content']}\"\n", + " for c in chunks\n", + " )\n", + " sources = sorted(set(c[\"doc_type\"] for c in chunks))\n", + "\n", + " \n", + " response = client.messages.create(\n", + " model=ANTHROPIC_MODEL,\n", + " max_tokens=1024,\n", + " system=SYSTEM_PROMPT.format(context=context),\n", + " messages=_chat_history + [{\"role\": \"user\", \"content\": question}]\n", + " )\n", + " answer = response.content[0].text.strip()\n", + "\n", + " \n", + " _chat_history.append({\"role\": \"user\", \"content\": question})\n", + " _chat_history.append({\"role\": \"assistant\", \"content\": answer})\n", + "\n", + " return answer + f\"\\n\\n_Sources: {', '.join(sources)}_\"\n", + "\n", + " except Exception as e:\n", + " return f\"❌ Error: {e}\"\n", + "\n", + "\n", + "print(\"RAG chain ready. Running smoke test...\\n\")\n", + "_chat_history = []\n", + "test_q = \"What is the person's background?\"\n", + "print(f\"Q: {test_q}\")\n", + "print(f\"A: {chat(test_q, [])}\")" + ] + }, + { + "cell_type": "markdown", + "id": "24e0e4f7", + "metadata": {}, + "source": [ + "## 10. Gradio Chat Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48799a64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Gradio interface configured\n" + ] + } + ], + "source": [ + "def create_gradio_interface():\n", + " \"\"\"Build and return the Gradio Blocks UI.\"\"\"\n", + "\n", + " THEME = gr.themes.Base(\n", + " primary_hue=\"violet\",\n", + " secondary_hue=\"purple\",\n", + " neutral_hue=\"slate\",\n", + " font=[gr.themes.GoogleFont(\"Inter\"), \"ui-sans-serif\", \"sans-serif\"],\n", + " ).set(\n", + " body_background_fill=\"#0f0f17\",\n", + " body_text_color=\"#e2e0f0\",\n", + " block_background_fill=\"#1a1a28\",\n", + " block_border_color=\"#2d2d45\",\n", + " block_title_text_color=\"#c4b5fd\",\n", + " input_background_fill=\"#12121e\",\n", + " button_primary_background_fill=\"linear-gradient(135deg, #7c3aed, #a855f7)\",\n", + " button_primary_text_color=\"#fff\",\n", + " )\n", + "\n", + " with gr.Blocks(theme=THEME) as ui:\n", + "\n", + " gr.HTML(\"\"\"\n", + "
\n", + "

\n", + " 🧠 Personal Knowledge Worker\n", + "

\n", + "

\n", + " Powered by Claude · RAG over your personal knowledge base\n", + "

\n", + "
\n", + " \"\"\")\n", + "\n", + " with gr.Tabs():\n", + "\n", + " with gr.Tab(\"💬 Chat\"):\n", + " gr.ChatInterface(\n", + " fn=chat,\n", + " title=\"\",\n", + " description=\"Ask anything about your personal data\",\n", + " examples=[\n", + " \"What is my background?\",\n", + " \"Tell me about my projects\",\n", + " \"What am I currently learning?\",\n", + " \"What are my main skills?\",\n", + " \"What certifications do I have?\",\n", + " ],\n", + " )\n", + "\n", + " \n", + " with gr.Tab(\"📁 Knowledge Base\"):\n", + " gr.Markdown(\"### Files in the knowledge base\")\n", + " kb_info = gr.Textbox(\n", + " value=\"\\n\".join(\n", + " f\"[{c['doc_type']}] {c['source']} ({len(c['content'])} chars)\"\n", + " for c in all_chunks[:20]\n", + " ) + (f\"\\n…and {len(all_chunks)-20} more chunks\" if len(all_chunks) > 20 else \"\"),\n", + " lines=15, interactive=False,\n", + " label=\"Chunks preview\"\n", + " )\n", + " gr.Markdown(\n", + " f\"**Total chunks:** {len(all_chunks)} · \"\n", + " f\"**Vector dimensions:** 128 · \"\n", + " f\"**Model:** {ANTHROPIC_MODEL}\"\n", + " )\n", + "\n", + " with gr.Tab(\"ℹ️ About\"):\n", + " gr.Markdown(\"\"\"\n", + "## RAG Pipeline\n", + "\n", + "### How it works\n", + "1. **Load** — Markdown files are read from `knowledge_base/personal/`, `projects/`, `learning/`\n", + "2. **Chunk** — Documents are split into 500-character overlapping chunks\n", + "3. **Embed** — Each chunk is embedded into a 128-dim vector via Claude\n", + "4. **Store** — Vectors are stored in an in-memory ChromaDB collection\n", + "5. **Retrieve** — At query time, the question is embedded and top-5 chunks retrieved by cosine similarity\n", + "6. **Generate** — Claude answers using only the retrieved context, with full conversation history\n", + "\n", + "### Tech stack\n", + "- **LLM & Embeddings**: Anthropic Claude (`claude-sonnet-4-20250514`)\n", + "- **Vector store**: ChromaDB (in-memory)\n", + "- **Visualisation**: t-SNE via scikit-learn + Plotly\n", + "- **UI**: Gradio\n", + "\n", + "### Tips\n", + "- Ask specific questions for better answers\n", + "- Follow-up questions work thanks to conversation memory\n", + "- Add your own `.md` files to the `knowledge_base/` subfolders and re-run the notebook\n", + " \"\"\")\n", + "\n", + " gr.HTML(\"\"\"\n", + "
\n", + " Week 5 Exercise · Personal Knowledge Worker · Built with Gradio + Claude\n", + "
\n", + " \"\"\")\n", + "\n", + " return ui\n", + "\n", + "print(\"✅ Gradio interface configured\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8e38c79e", + "metadata": {}, + "source": [ + "## 11. Launch the Application" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0916b56c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Lenovo\\AppData\\Local\\Temp\\ipykernel_20300\\2300027294.py:20: UserWarning: The parameters have been moved from the Blocks constructor to the launch() method in Gradio 6.0: theme. Please pass these parameters to launch() instead.\n", + " with gr.Blocks(theme=THEME) as ui:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Launching Personal Knowledge Worker...\n", + "Open the URL shown below in your browser.\n", + "Press the Stop button in the notebook toolbar to shut down.\n", + "\n", + "* Running on local URL: http://127.0.0.1:7869\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "_chat_history = [] \n", + "\n", + "ui = create_gradio_interface()\n", + "\n", + "print(\"\\nLaunching Personal Knowledge Worker...\")\n", + "print(\"Open the URL shown below in your browser.\")\n", + "print(\"Press the Stop button in the notebook toolbar to shut down.\\n\")\n", + "\n", + "ui.launch(share=False, server_port=7869)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv (3.11.9)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ad3332f81e4c9413691dcd9c99c6c2912acf2ddb Mon Sep 17 00:00:00 2001 From: Jonas Thamane <166150947+NathiJonas@users.noreply.github.com> Date: Mon, 9 Mar 2026 19:49:03 +0200 Subject: [PATCH 2/2] Update Jonas Thamane Week 5 PR.ipynb Fixed the output issue. --- .../Jonas Thamane Week 5 PR.ipynb | 1976 +---------------- 1 file changed, 16 insertions(+), 1960 deletions(-) diff --git a/community-contributions/Jonas Thamane Week 5 PR.ipynb b/community-contributions/Jonas Thamane Week 5 PR.ipynb index de4141564d..2cc9a10821 100644 --- a/community-contributions/Jonas Thamane Week 5 PR.ipynb +++ b/community-contributions/Jonas Thamane Week 5 PR.ipynb @@ -30,20 +30,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ee0830c0", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 24.0 -> 26.0.1\n", - "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n" - ] - } - ], + "outputs": [], "source": [ "import sys\n", "!{sys.executable} -m pip install -q gradio anthropic chromadb python-dotenv numpy plotly scikit-learn" @@ -62,24 +52,7 @@ "execution_count": null, "id": "fd5a77b7", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Anthropic API Key found: sk-ant-api03-me...\n", - "✅ Configuration ready\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import re\n", @@ -130,15 +103,7 @@ "execution_count": null, "id": "636ae5d6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Global state initialised\n" - ] - } - ], + "outputs": [], "source": [ "_client: Optional[anthropic.Anthropic] = None\n", "_chroma: Optional[chromadb.Client] = None\n", @@ -169,22 +134,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "ea9c4f6f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Exists: knowledge_base\\personal\\profile.md\n", - " Exists: knowledge_base\\projects\\portfolio.md\n", - " Exists: knowledge_base\\learning\\journey.md\n", - "\n", - "✅ Knowledge base ready at: C:\\Users\\Lenovo\\Downloads\\knowledge_base\n" - ] - } - ], + "outputs": [], "source": [ "def create_sample_knowledge_base():\n", " \"\"\"Create sample knowledge base with personal, projects, and learning data.\"\"\"\n", @@ -320,23 +273,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "4d2684a9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded 3 documents\n", - "Document types: ['learning', 'personal', 'projects']\n", - "\n", - "Total chunks: 10\n", - "Average chunk size: 460 characters\n", - "Chunks per doc type: {'projects': 4, 'learning': 3, 'personal': 3}\n" - ] - } - ], + "outputs": [], "source": [ "def load_documents() -> list[dict]:\n", " \"\"\"\n", @@ -411,15 +351,7 @@ "execution_count": null, "id": "a0c76d4b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Embedding functions defined\n" - ] - } - ], + "outputs": [], "source": [ "def _parse_embedding(raw: str) -> list[float]:\n", " \"\"\"Robustly extract a float array from Claude's response.\"\"\"\n", @@ -508,18 +440,7 @@ "execution_count": null, "id": "54ca1938", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Embedding 10 chunks (this may take a minute)...\n", - "\n", - "✅ Vector store ready — 10 chunks stored\n", - " Embedding dimensions: 128\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"Embedding {len(all_chunks)} chunks (this may take a minute)...\")\n", "\n", @@ -565,1744 +486,7 @@ "execution_count": null, "id": "f38ffcd8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running t-SNE on 10 vectors (perplexity=5.0)...\n" - ] - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "text", - "marker": { - "color": [ - "#4dd0e1", - "#4dd0e1", - "#4dd0e1", - "#7c6af7", - "#7c6af7", - "#7c6af7", - "#f06292", - "#f06292", - "#f06292", - "#f06292" - ], - "line": { - "color": "white", - "width": 0.5 - }, - "opacity": 0.85, - "size": 7 - }, - "mode": "markers", - "text": [ - "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...", - "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...", - "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...", - "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...", - "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...", - "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...", - "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...", - "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...", - "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...", - "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..." - ], - "type": "scatter", - "x": { - "bdata": "VFF9winPucKlebdB6VmIwrQdVUIaBgVC0CFxwgBTHML2SUlCqfk8QQ==", - "dtype": "f4" - }, - "y": { - "bdata": "2bg+wq+WxsGq+ITB042bQgc66ELOwhZDIH/9Ql7QTEJY1SFBXGejQg==", - "dtype": "f4" - } - } - ], - "layout": { - "height": 600, - "paper_bgcolor": "#0f0f17", - "plot_bgcolor": "#1a1a28", - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#f2f5fa" - }, - "error_y": { - "color": "#f2f5fa" - }, - "marker": { - "line": { - "color": "rgb(17,17,17)", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "rgb(17,17,17)", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#A2B1C6", - "gridcolor": "#506784", - "linecolor": "#506784", - "minorgridcolor": "#506784", - "startlinecolor": "#A2B1C6" - }, - "baxis": { - "endlinecolor": "#A2B1C6", - "gridcolor": "#506784", - "linecolor": "#506784", - "minorgridcolor": "#506784", - "startlinecolor": "#A2B1C6" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "marker": { - "line": { - "color": "#283442" - } - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "line": { - "color": "#283442" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#506784" - }, - "line": { - "color": "rgb(17,17,17)" - } - }, - "header": { - "fill": { - "color": "#2a3f5f" - }, - "line": { - "color": "rgb(17,17,17)" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#f2f5fa", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#f2f5fa" - }, - "geo": { - "bgcolor": "rgb(17,17,17)", - "lakecolor": "rgb(17,17,17)", - "landcolor": "rgb(17,17,17)", - "showlakes": true, - "showland": true, - "subunitcolor": "#506784" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "dark" - }, - "paper_bgcolor": "rgb(17,17,17)", - "plot_bgcolor": "rgb(17,17,17)", - "polar": { - "angularaxis": { - "gridcolor": "#506784", - "linecolor": "#506784", - "ticks": "" - }, - "bgcolor": "rgb(17,17,17)", - "radialaxis": { - "gridcolor": "#506784", - "linecolor": "#506784", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "rgb(17,17,17)", - "gridcolor": "#506784", - "gridwidth": 2, - "linecolor": "#506784", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#C8D4E3" - }, - "yaxis": { - "backgroundcolor": "rgb(17,17,17)", - "gridcolor": "#506784", - "gridwidth": 2, - "linecolor": "#506784", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#C8D4E3" - }, - "zaxis": { - "backgroundcolor": "rgb(17,17,17)", - "gridcolor": "#506784", - "gridwidth": 2, - "linecolor": "#506784", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#C8D4E3" - } - }, - "shapedefaults": { - "line": { - "color": "#f2f5fa" - } - }, - "sliderdefaults": { - "bgcolor": "#C8D4E3", - "bordercolor": "rgb(17,17,17)", - "borderwidth": 1, - "tickwidth": 0 - }, - "ternary": { - "aaxis": { - "gridcolor": "#506784", - "linecolor": "#506784", - "ticks": "" - }, - "baxis": { - "gridcolor": "#506784", - "linecolor": "#506784", - "ticks": "" - }, - "bgcolor": "rgb(17,17,17)", - "caxis": { - "gridcolor": "#506784", - "linecolor": "#506784", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "updatemenudefaults": { - "bgcolor": "#506784", - "borderwidth": 0 - }, - "xaxis": { - "automargin": true, - "gridcolor": "#283442", - "linecolor": "#506784", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "#283442", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "#283442", - "linecolor": "#506784", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "#283442", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "2D Vector Store Visualisation (t-SNE)" - }, - "width": 900, - "xaxis": { - "title": { - "text": "t-SNE Dimension 1" - } - }, - "yaxis": { - "title": { - "text": "t-SNE Dimension 2" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "text", - "marker": { - "color": [ - "#4dd0e1", - "#4dd0e1", - "#4dd0e1", - "#7c6af7", - "#7c6af7", - "#7c6af7", - "#f06292", - "#f06292", - "#f06292", - "#f06292" - ], - "opacity": 0.8, - "size": 5 - }, - "mode": "markers", - "text": [ - "Type: learning
# Learning Journey\n\n## Currently Learning (2025)\n- Advanced RAG techniques: reranking, hybrid search, query decomposition\n- Anthropic's Claude API: to...", - "Type: learning
4\n- Fullstack Open — University of Helsinki — 2022\n- AWS Solutions Architect Associate — 2023\n- Fast.ai Practical Deep Learning — 2024\n\n## Books Read\n...", - "Type: learning
- AWS Certified Solutions Architect — Associate (2023)\n- Google Professional Data Engineer (2024)\n- Certified Kubernetes Application Developer — CKAD ...", - "Type: personal
# Personal Profile\n\n## About Me\nName: Alex Johnson\nRole: Software Engineer & AI Enthusiast\nLocation: Tech Hub City\n\n## Background\nI am a passionate so...", - "Type: personal
since worked at two startups.\n\n## Skills\n- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n- Frameworks: React, FastAPI, LangChain, Gr...", - "Type: personal
ource projects, and mentoring aspiring developers.\nIn my free time I enjoy hiking, reading tech blogs, and experimenting with new AI tools.\nI run a sm...", - "Type: projects
# Projects Portfolio\n\n## AI-Powered Document Assistant\nA RAG-based system that helps users query large document collections efficiently.\nTech Stack: P...", - "Type: projects
erlapping chunks outperform large ones.\n\n## Real-time Analytics Dashboard\nBuilt a scalable dashboard for visualising business metrics in real-time.\nTe...", - "Type: projects
n AI assistant that provides automated code reviews and suggestions.\nTech Stack: Python, GitHub API, Claude API\nFeatures: Pattern detection, best prac...", - "Type: projects
PostgreSQL, ML forecasting\nFeatures: Receipt scanning via OCR, budget alerts, spending trend analysis\n\n## E-commerce Recommendation Engine\nBuilt a co..." - ], - "type": "scatter3d", - "x": { - "bdata": "Rt5bwi6NBkKfSnNC1gRiQZH7xUJLJ/g/L/fLQsdQfsKaOA/CTQJxwg==", - "dtype": "f4" - }, - "y": { - "bdata": "gfYWQv0Ja0KJfMDC0FO8QuoFAcNkdd3CpyMGQumGHkLlfdTCrrcvQw==", - "dtype": "f4" - }, - "z": { - "bdata": "FVmLwh/Vy8KQFcXCeCOIQhtWm0J6AqpCp5UXQlJCvUKRYMLCXcrmwg==", - "dtype": "f4" - } - } - ], - "layout": { - "height": 750, - "paper_bgcolor": "#0f0f17", - "scene": { - "bgcolor": "#1a1a28", - "xaxis": { - "title": { - "text": "Dim 1" - } - }, - "yaxis": { - "title": { - "text": "Dim 2" - } - }, - "zaxis": { - "title": { - "text": "Dim 3" - } - } - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "3D Vector Store Visualisation (t-SNE)" - }, - "width": 1000 - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Visualisations complete\n" - ] - } - ], + "outputs": [], "source": [ "\n", "result = _collection.get(include=[\"embeddings\", \"documents\", \"metadatas\"])\n", @@ -2377,44 +561,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "6805eec2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting nbformat\n", - " Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting fastjsonschema>=2.15 (from nbformat)\n", - " Using cached fastjsonschema-2.21.2-py3-none-any.whl.metadata (2.3 kB)\n", - "Requirement already satisfied: jsonschema>=2.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (4.26.0)\n", - "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.9.1)\n", - "Requirement already satisfied: traitlets>=5.1 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from nbformat) (5.14.3)\n", - "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (25.4.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (2025.9.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.37.0)\n", - "Requirement already satisfied: rpds-py>=0.25.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jsonschema>=2.6->nbformat) (0.30.0)\n", - "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat) (4.9.2)\n", - "Requirement already satisfied: typing-extensions>=4.4.0 in c:\\users\\lenovo\\projects\\llm_engineering\\.venv\\lib\\site-packages (from referencing>=0.28.4->jsonschema>=2.6->nbformat) (4.15.0)\n", - "Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)\n", - "Using cached fastjsonschema-2.21.2-py3-none-any.whl (24 kB)\n", - "Installing collected packages: fastjsonschema, nbformat\n", - "Successfully installed fastjsonschema-2.21.2 nbformat-5.10.4\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 24.0 -> 26.0.1\n", - "[notice] To update, run: c:\\Users\\Lenovo\\projects\\llm_engineering\\.venv\\Scripts\\python.exe -m pip install --upgrade pip\n" - ] - } - ], + "outputs": [], "source": [ "pip install nbformat" ] @@ -2424,51 +574,7 @@ "execution_count": null, "id": "24877f38", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RAG chain ready. Running smoke test...\n", - "\n", - "Q: What is the person's background?\n", - "A: Based on the profile document, here's the person's background:\n", - "\n", - "**Professional Background:**\n", - "- Currently works as a Senior Software Engineer at TechCorp\n", - "- Previously worked at two startups before joining TechCorp\n", - "\n", - "**Technical Skills:**\n", - "- Programming Languages: Python, JavaScript, TypeScript, Go, Rust\n", - "- Frameworks: React, FastAPI, LangChain, Gradio\n", - "- AI/ML: LLMs, RAG systems, Vector Databases, Prompt Engineering, Fine-tuning\n", - "- Databases: PostgreSQL, MongoDB, Chroma, Pinecone, Redis\n", - "- Cloud: AWS, GCP, Docker, Kubernetes\n", - "\n", - "**Certifications:**\n", - "- AWS Certified Solutions Architect — Associate (2023)\n", - "- Google Professional Data Engineer (2024)\n", - "- Certified Kubernetes Application Developer — CKAD (2023)\n", - "\n", - "**Personal Interests:**\n", - "- Exploring new technologies\n", - "- Contributing to open-source projects\n", - "- Mentoring aspiring developers\n", - "- Hiking and reading tech blogs\n", - "- Experimenting with new AI tools\n", - "- Runs a small tech blog with 2,000 monthly readers\n", - "\n", - "**Contact Information:**\n", - "- GitHub: github.com/alexj\n", - "- LinkedIn: linkedin.com/in/alexjohnson\n", - "- Email: alex@techmail.com\n", - "\n", - "*Source: personal/profile.md and learning/journey.md*\n", - "\n", - "_Sources: learning, personal, projects_\n" - ] - } - ], + "outputs": [], "source": [ "SYSTEM_PROMPT = textwrap.dedent(\"\"\"\n", " You are a helpful personal knowledge assistant.\n", @@ -2558,15 +664,7 @@ "execution_count": null, "id": "48799a64", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Gradio interface configured\n" - ] - } - ], + "outputs": [], "source": [ "def create_gradio_interface():\n", " \"\"\"Build and return the Gradio Blocks UI.\"\"\"\n", @@ -2684,49 +782,7 @@ "execution_count": null, "id": "0916b56c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lenovo\\AppData\\Local\\Temp\\ipykernel_20300\\2300027294.py:20: UserWarning: The parameters have been moved from the Blocks constructor to the launch() method in Gradio 6.0: theme. Please pass these parameters to launch() instead.\n", - " with gr.Blocks(theme=THEME) as ui:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Launching Personal Knowledge Worker...\n", - "Open the URL shown below in your browser.\n", - "Press the Stop button in the notebook toolbar to shut down.\n", - "\n", - "* Running on local URL: http://127.0.0.1:7869\n", - "* To create a public link, set `share=True` in `launch()`.\n" - ] - }, - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "_chat_history = [] \n", "\n",