diff --git a/.github/workflows/notebook-validation.yml b/.github/workflows/notebook-validation.yml
new file mode 100644
index 0000000..0674575
--- /dev/null
+++ b/.github/workflows/notebook-validation.yml
@@ -0,0 +1,37 @@
+name: Validate notebooks
+
+on:
+  pull_request:
+    branches: [master]
+    paths:
+      - "templates/**/*.ipynb"
+      - "solutions/**/*.ipynb"
+      - "scripts/validate_notebooks.py"
+      - ".github/workflows/notebook-validation.yml"
+  push:
+    branches: [master]
+    paths:
+      - "templates/**/*.ipynb"
+      - "solutions/**/*.ipynb"
+      - "scripts/validate_notebooks.py"
+      - ".github/workflows/notebook-validation.yml"
+  workflow_dispatch:
+
+jobs:
+  validate-notebooks:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install nbformat
+        run: pip install nbformat
+
+      - name: Validate notebooks
+        run: python scripts/validate_notebooks.py
diff --git a/scripts/add_colab_badges.py b/scripts/add_colab_badges.py
index e6f0c1a..6289ddb 100644
--- a/scripts/add_colab_badges.py
+++ b/scripts/add_colab_badges.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Add 'Open in Colab' badges to all template and solution notebooks."""
+"""Add 'Open in Colab' links to all template and solution notebooks."""
 
 import json
 from pathlib import Path
@@ -9,7 +9,6 @@
 ROOT = Path(__file__).resolve().parent.parent
 TEMPLATES_DIR = ROOT / "templates"
 SOLUTIONS_DIR = ROOT / "solutions"
-BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
 
 
 def colab_url(filename: str, folder: str) -> str:
@@ -19,8 +18,8 @@ def colab_url(filename: str, folder: str) -> str:
     )
 
 
-def badge_markdown(filename: str, folder: str) -> str:
-    return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename, folder)})"
+def colab_markdown(filename: str, folder: str) -> str:
+    return f"[Open in Colab]({colab_url(filename, folder)})"
 
 
 def process_notebook(path: Path, folder: str) -> bool:
@@ -33,11 +32,11 @@ def process_notebook(path: Path, folder: str) -> bool:
 
     source_lines = cells[0]["source"]
     flat = "".join(source_lines) if isinstance(source_lines, list) else source_lines
-    if "colab-badge.svg" in flat:
+    if "colab.research.google.com/github/" in flat:
         return False
 
-    badge = badge_markdown(path.name, folder)
-    cells[0]["source"] = [badge + "\n\n"] + (
+    link = colab_markdown(path.name, folder)
+    cells[0]["source"] = [link + "\n\n"] + (
         source_lines if isinstance(source_lines, list) else [source_lines]
     )
 
diff --git a/scripts/validate_notebooks.py b/scripts/validate_notebooks.py
new file mode 100644
index 0000000..1ae3954
--- /dev/null
+++ b/scripts/validate_notebooks.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+"""Validate and optionally repair notebook cell schemas."""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+from pathlib import Path
+import re
+from typing import Any
+
+
+ROOT = Path(__file__).resolve().parent.parent
+NOTEBOOK_GLOBS = ("templates/*.ipynb", "solutions/*.ipynb")
+CODE_ONLY_FIELDS = ("outputs", "execution_count")
+CELL_ID_RE = re.compile(r"^[A-Za-z0-9-_]+$")
+
+
+def notebook_paths() -> list[Path]:
+    paths: list[Path] = []
+    for pattern in NOTEBOOK_GLOBS:
+        paths.extend(ROOT.glob(pattern))
+    return sorted(paths)
+
+
+def load_notebook(path: Path) -> dict[str, Any]:
+    with path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def write_notebook(path: Path, notebook: dict[str, Any]) -> None:
+    with path.open("w", encoding="utf-8") as f:
+        json.dump(notebook, f, ensure_ascii=False, indent=1)
+        f.write("\n")
+
+
+def source_text(cell: dict[str, Any]) -> str:
+    source = cell.get("source", "")
+    if isinstance(source, list):
+        return "".join(str(line) for line in source)
+    return str(source)
+
+
+def stable_cell_id(path: Path, index: int, cell: dict[str, Any], used: set[str]) -> str:
+    seed = (
+        f"{path.relative_to(ROOT)}:{index}:"
+        f"{cell.get('cell_type', '')}:{source_text(cell)}"
+    )
+    base = f"cell-{hashlib.sha1(seed.encode('utf-8')).hexdigest()[:12]}"
+    cell_id = base
+    suffix = 1
+    while cell_id in used:
+        cell_id = f"{base}-{suffix}"
+        suffix += 1
+    return cell_id
+
+
+def sanitize_notebook(path: Path, fix: bool) -> list[str]:
+    notebook = load_notebook(path)
+    errors: list[str] = []
+    changed = False
+    used_ids: set[str] = set()
+    needs_cell_ids = False
+    nbformat_minor = int(notebook.get("nbformat_minor", 0))
+    version_error_added = False
+
+    for index, cell in enumerate(notebook.get("cells", [])):
+        cell_id = cell.get("id")
+        if not isinstance(cell_id, str) or not cell_id:
+            errors.append(f"{path.relative_to(ROOT)} cell {index}: missing cell id")
+            needs_cell_ids = True
+            if fix:
+                cell["id"] = stable_cell_id(path, index, cell, used_ids)
+                cell_id = cell["id"]
+                changed = True
+        elif not CELL_ID_RE.match(cell_id):
+            errors.append(f"{path.relative_to(ROOT)} cell {index}: invalid cell id")
+            needs_cell_ids = True
+            if fix:
+                cell["id"] = stable_cell_id(path, index, cell, used_ids)
+                cell_id = cell["id"]
+                changed = True
+
+        if isinstance(cell_id, str):
+            if cell_id in used_ids:
+                errors.append(f"{path.relative_to(ROOT)} cell {index}: duplicate cell id")
+                if fix:
+                    cell["id"] = stable_cell_id(path, index, cell, used_ids)
+                    cell_id = cell["id"]
+                    changed = True
+            used_ids.add(cell_id)
+
+        if (
+            isinstance(cell_id, str)
+            and cell_id
+            and nbformat_minor < 5
+            and not version_error_added
+        ):
+            errors.append(
+                f"{path.relative_to(ROOT)}: cell ids require nbformat_minor >= 5"
+            )
+            version_error_added = True
+            if fix:
+                needs_cell_ids = True
+                changed = True
+
+        if cell.get("cell_type") == "code":
+            continue
+
+        for field in CODE_ONLY_FIELDS:
+            if field in cell:
+                errors.append(
+                    f"{path.relative_to(ROOT)} cell {index}: "
+                    f"non-code cell contains '{field}'"
+                )
+                if fix:
+                    del cell[field]
+                    changed = True
+
+    if changed:
+        if needs_cell_ids:
+            notebook["nbformat"] = 4
+            notebook["nbformat_minor"] = max(int(notebook.get("nbformat_minor", 0)), 5)
+        write_notebook(path, notebook)
+
+    return errors
+
+
+def validate_with_nbformat(path: Path) -> str | None:
+    try:
+        import nbformat
+    except ImportError:
+        return None
+
+    try:
+        notebook = nbformat.read(path, as_version=4)
+        nbformat.validate(notebook)
+    except Exception as exc:  # pragma: no cover - message is for CLI output
+        return f"{path.relative_to(ROOT)}: {exc}"
+
+    return None
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--fix",
+        action="store_true",
+        help="repair missing/invalid ids and remove code-only fields from non-code cells",
+    )
+    args = parser.parse_args()
+
+    schema_errors: list[str] = []
+    for path in notebook_paths():
+        schema_errors.extend(sanitize_notebook(path, args.fix))
+
+    if schema_errors and not args.fix:
+        print("Notebook schema errors:")
+        print("\n".join(schema_errors))
+        print("\nRun scripts/validate_notebooks.py --fix to repair them.")
+        return 1
+
+    nbformat_errors: list[str] = []
+    for path in notebook_paths():
+        error = validate_with_nbformat(path)
+        if error:
+            nbformat_errors.append(error)
+
+    if nbformat_errors:
+        print("nbformat validation errors:")
+        print("\n".join(nbformat_errors))
+        return 1
+
+    if schema_errors:
+        print(f"Fixed {len(schema_errors)} notebook schema issue(s).")
+    else:
+        print("All notebooks passed validation.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/solutions/01_relu_solution.ipynb b/solutions/01_relu_solution.ipynb
index 085e3ab..9f95fe9 100644
--- a/solutions/01_relu_solution.ipynb
+++ b/solutions/01_relu_solution.ipynb
@@ -5,7 +5,7 @@
    "id": "0556419b",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n",
     "\n",
     "# 🟢 Solution: Implement ReLU\n",
     "\n",
@@ -26,7 +26,8 @@
     "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
     "except ImportError:\n",
     "    pass\n"
-   ]
+   ],
+   "id": "cell-8b01cc47c4eb"
   },
   {
    "cell_type": "code",
@@ -35,7 +36,8 @@
    "outputs": [],
    "source": [
     "import torch"
-   ]
+   ],
+   "id": "cell-42aacb4e5964"
   },
   {
    "cell_type": "code",
@@ -47,7 +49,8 @@
     "\n",
     "def relu(x: torch.Tensor) -> torch.Tensor:\n",
     "    return x * (x > 0).float()"
-   ]
+   ],
+   "id": "cell-77d0ad2d5301"
   },
   {
    "cell_type": "code",
@@ -59,7 +62,8 @@
     "x = torch.tensor([-2., -1., 0., 1., 2.])\n",
     "print(\"Input: \", x)\n",
     "print(\"Output:\", relu(x))"
-   ]
+   ],
+   "id": "cell-ee0dd6b7c97c"
   },
   {
    "cell_type": "code",
@@ -70,7 +74,8 @@
     "# Run judge\n",
     "from torch_judge import check\n",
     "check(\"relu\")"
-   ]
+   ],
+   "id": "cell-a93501c6f94f"
   }
  ],
  "metadata": {
diff --git a/solutions/02_softmax_solution.ipynb b/solutions/02_softmax_solution.ipynb
index 902106e..c6a451d 100644
--- a/solutions/02_softmax_solution.ipynb
+++ b/solutions/02_softmax_solution.ipynb
@@ -17,14 +17,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n",
     "# 🟢 Solution: Implement Softmax\n",
     "\n",
     "Reference solution for the numerically-stable Softmax function.\n",
     "\n",
     "$$\\text{softmax}(x_i) = \\frac{e^{x_i - \\max(x)}}{\\sum_j e^{x_j - \\max(x)}}$$"
    ],
-   "outputs": []
+   "id": "cell-1b2118d19858"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-23f1a6c527b4"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import torch"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-01a8eba71de9"
   },
   {
    "cell_type": "code",
@@ -61,7 +63,8 @@
     "    return e_x / e_x.sum(dim=dim, keepdim=True)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-455503eacc0f"
   },
   {
    "cell_type": "code",
@@ -74,7 +77,8 @@
     "print(\"Ref:   \", torch.softmax(x, dim=-1))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-10c0561ea637"
   },
   {
    "cell_type": "code",
@@ -85,7 +89,8 @@
     "check(\"softmax\")"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ff8e53cdd120"
   }
  ]
 }
diff --git a/solutions/03_linear_solution.ipynb b/solutions/03_linear_solution.ipynb
index 4f25a88..7ebd49c 100644
--- a/solutions/03_linear_solution.ipynb
+++ b/solutions/03_linear_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n",
     "# 🟡 Solution: Simple Linear Layer\n",
     "\n",
     "Reference solution for a fully-connected linear layer: **y = xW^T + b**"
    ],
-   "outputs": []
+   "id": "cell-292929ae7f3e"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bde97d16cfd7"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4bdd71596832"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "        return x @ self.weight.T + self.bias"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8f2221e4cfc8"
   },
   {
    "cell_type": "code",
@@ -78,7 +81,8 @@
     "print(\"Output shape:\", layer.forward(x).shape)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bb7edb8373a5"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "check(\"linear\")"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-9ee6e790f9c4"
   }
  ]
 }
diff --git a/solutions/04_layernorm_solution.ipynb b/solutions/04_layernorm_solution.ipynb
index 59ac87c..9b2c501 100644
--- a/solutions/04_layernorm_solution.ipynb
+++ b/solutions/04_layernorm_solution.ipynb
@@ -17,14 +17,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n",
     "# 🟡 Solution: Implement LayerNorm\n",
     "\n",
     "Reference solution for Layer Normalization.\n",
     "\n",
     "$$\\text{LayerNorm}(x) = \\gamma \\cdot \\frac{x - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} + \\beta$$"
    ],
-   "outputs": []
+   "id": "cell-cc810d3dd609"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-380c8f4b79b5"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import torch"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a8430847acb3"
   },
   {
    "cell_type": "code",
@@ -62,7 +64,8 @@
     "    return gamma * x_norm + beta"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-592a4ee9f6b8"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "print(\"Match ref?\", torch.allclose(out, ref, atol=1e-4))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4bcbd3a0190d"
   },
   {
    "cell_type": "code",
@@ -88,7 +92,8 @@
     "check(\"layernorm\")"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-69b499f00af4"
   }
  ]
 }
diff --git a/solutions/05_attention_solution.ipynb b/solutions/05_attention_solution.ipynb
index e82f45f..e2b5b10 100644
--- a/solutions/05_attention_solution.ipynb
+++ b/solutions/05_attention_solution.ipynb
@@ -5,7 +5,7 @@
    "id": "5f63d076",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Softmax Attention\n",
     "\n",
     "Reference solution for the core Transformer attention mechanism.\n",
@@ -36,7 +36,8 @@
    "source": [
     "import torch\n",
     "import math"
-   ]
+   ],
+   "id": "cell-caa94740fb9a"
   },
   {
    "cell_type": "code",
@@ -76,7 +77,8 @@
     "V2 = torch.randn(1, 5, 32)\n",
     "out2 = scaled_dot_product_attention(Q2, K2, V2)\n",
     "print(\"Cross-attention shape:\", out2.shape, \"(expected: 1, 3, 32)\")"
-   ]
+   ],
+   "id": "cell-076c9d2a43e9"
   },
   {
    "cell_type": "code",
@@ -87,7 +89,8 @@
     "# Run judge\n",
     "from torch_judge import check\n",
     "check(\"attention\")"
-   ]
+   ],
+   "id": "cell-490c36a20e6e"
   }
  ],
  "metadata": {
diff --git a/solutions/06_multihead_attention_solution.ipynb b/solutions/06_multihead_attention_solution.ipynb
index 7ed4ad4..8ec2b73 100644
--- a/solutions/06_multihead_attention_solution.ipynb
+++ b/solutions/06_multihead_attention_solution.ipynb
@@ -4,13 +4,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Multi-Head Attention\n",
     "\n",
     "Reference solution for the Multi-Head Attention mechanism.\n",
     "\n",
     "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
-   ]
+   ],
+   "id": "cell-028370b72535"
   },
   {
    "cell_type": "code",
@@ -24,7 +25,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-908cc63a33f7"
   },
   {
    "cell_type": "code",
@@ -35,7 +37,8 @@
     "import torch\n",
     "import torch.nn as nn\n",
     "import math"
-   ]
+   ],
+   "id": "cell-8ece6c477a3f"
   },
   {
    "cell_type": "code",
@@ -90,7 +93,8 @@
     "V = torch.randn(1, 7, 32)\n",
     "out2 = mha.forward(Q, K, V)\n",
     "print(\"Cross-attn shape:\", out2.shape)"
-   ]
+   ],
+   "id": "cell-2a0617b81206"
   },
   {
    "cell_type": "code",
@@ -101,7 +105,8 @@
     "# Run judge\n",
     "from torch_judge import check\n",
     "check(\"mha\")"
-   ]
+   ],
+   "id": "cell-0003f3ca234b"
   }
  ],
  "metadata": {
diff --git a/solutions/07_batchnorm_solution.ipynb b/solutions/07_batchnorm_solution.ipynb
index a108cc1..6538a23 100644
--- a/solutions/07_batchnorm_solution.ipynb
+++ b/solutions/07_batchnorm_solution.ipynb
@@ -5,7 +5,7 @@
    "id": "ffd42526",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n",
     "# 🟡 Solution: Implement BatchNorm\n",
     "\n",
     "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d351072ade14"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "outputs": [],
    "source": [
     "import torch"
-   ]
+   ],
+   "id": "cell-5d5e4a552d3b"
   },
   {
    "cell_type": "code",
@@ -113,7 +115,8 @@
    "source": [
     "from torch_judge import check\n",
     "check('batchnorm')"
-   ]
+   ],
+   "id": "cell-d82b40692367"
   }
  ],
  "metadata": {
diff --git a/solutions/08_rmsnorm_solution.ipynb b/solutions/08_rmsnorm_solution.ipynb
index 0d58056..300c7e2 100644
--- a/solutions/08_rmsnorm_solution.ipynb
+++ b/solutions/08_rmsnorm_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n",
     "# 🟡 Solution: Implement RMSNorm\n",
     "\n",
     "Reference solution for Root Mean Square Normalization."
    ],
-   "outputs": []
+   "id": "cell-57b348142d0b"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e6f3d2d28e19"
   },
   {
    "cell_type": "code",
@@ -45,7 +46,8 @@
     "import torch"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-94d24134aa71"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "    return x / rms * weight"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-af5101f0d85b"
   },
   {
    "cell_type": "code",
@@ -69,7 +72,8 @@
     "print('RMS of output:', out.pow(2).mean(dim=-1).sqrt())"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a34a6cbd5af6"
   },
   {
    "cell_type": "code",
@@ -79,7 +83,8 @@
     "check('rmsnorm')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a29bcc88c41f"
   }
  ]
 }
diff --git a/solutions/09_causal_attention_solution.ipynb b/solutions/09_causal_attention_solution.ipynb
index 995bcfb..52cf154 100644
--- a/solutions/09_causal_attention_solution.ipynb
+++ b/solutions/09_causal_attention_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Causal Self-Attention\n",
     "\n",
     "Reference solution — softmax attention with an upper-triangular mask."
    ],
-   "outputs": []
+   "id": "cell-c1de2705a1a5"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-142ed06bfb76"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-00d287977000"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "    return torch.bmm(weights, V)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-288ac0183691"
   },
   {
    "cell_type": "code",
@@ -79,7 +82,8 @@
     "print(\"Pos 0 == V[0]?\", torch.allclose(out[:, 0], V[:, 0], atol=1e-5))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ce0ed55dc29a"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "check('causal_attention')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5aae090f98f0"
   }
  ]
 }
diff --git a/solutions/10_gqa_solution.ipynb b/solutions/10_gqa_solution.ipynb
index fe0c545..edd8512 100644
--- a/solutions/10_gqa_solution.ipynb
+++ b/solutions/10_gqa_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n",
     "# 🔴 Solution: Grouped Query Attention\n",
     "\n",
     "Reference solution for GQA — MHA with shared KV heads."
    ],
-   "outputs": []
+   "id": "cell-2f6b8c8449d1"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b627b5983264"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-961aabd90256"
   },
   {
    "cell_type": "code",
@@ -80,7 +82,8 @@
     "        return self.W_o(out)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3c7151bd4bda"
   },
   {
    "cell_type": "code",
@@ -90,7 +93,8 @@
     "print('Output:', gqa.forward(torch.randn(1, 4, 32)).shape)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-63491cf021fc"
   },
   {
    "cell_type": "code",
@@ -100,7 +104,8 @@
     "check('gqa')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-43fba5030854"
   }
  ]
 }
diff --git a/solutions/11_sliding_window_solution.ipynb b/solutions/11_sliding_window_solution.ipynb
index 54b77aa..395b247 100644
--- a/solutions/11_sliding_window_solution.ipynb
+++ b/solutions/11_sliding_window_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n",
     "# 🔴 Solution: Sliding Window Attention\n",
     "\n",
     "Reference solution — softmax attention with a band mask."
    ],
-   "outputs": []
+   "id": "cell-949f274c6929"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-76a1ff3fb104"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-215c73588b6d"
   },
   {
    "cell_type": "code",
@@ -65,7 +67,8 @@
     "    return torch.bmm(weights, V)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e7c1261d7cc2"
   },
   {
    "cell_type": "code",
@@ -75,7 +78,8 @@
     "print('window=0==V?', torch.allclose(sliding_window_attention(Q,K,V,0), V, atol=1e-5))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0bfe3313e0ec"
   },
   {
    "cell_type": "code",
@@ -85,7 +89,8 @@
     "check('sliding_window')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c32446d3d2ce"
   }
  ]
 }
diff --git a/solutions/12_linear_attention_solution.ipynb b/solutions/12_linear_attention_solution.ipynb
index a16ec27..d3142af 100644
--- a/solutions/12_linear_attention_solution.ipynb
+++ b/solutions/12_linear_attention_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Linear Self-Attention\n",
     "\n",
     "Reference solution — kernel-based attention with elu+1 feature map."
    ],
-   "outputs": []
+   "id": "cell-e6d1a2463446"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8d5499280f34"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import torch.nn.functional as F"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e0c65ba3811c"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "    return num / (den + 1e-6)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5de8a4fccb92"
   },
   {
    "cell_type": "code",
@@ -74,7 +77,8 @@
     "print('Shape:', linear_attention(Q,K,V).shape)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-08049c1fc1e9"
   },
   {
    "cell_type": "code",
@@ -84,7 +88,8 @@
     "check('linear_attention')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0c691bb3dda2"
   }
  ]
 }
diff --git a/solutions/13_gpt2_block_solution.ipynb b/solutions/13_gpt2_block_solution.ipynb
index ea77036..70c4e80 100644
--- a/solutions/13_gpt2_block_solution.ipynb
+++ b/solutions/13_gpt2_block_solution.ipynb
@@ -17,12 +17,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n",
     "# 🔴 Solution: GPT-2 Transformer Block\n",
     "\n",
     "Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
    ],
-   "outputs": []
+   "id": "cell-f139f1687651"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5797a9b4d794"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ccdc588b2e88"
   },
   {
    "cell_type": "code",
@@ -93,7 +95,8 @@
     "        return x"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b8f48d2d0577"
   },
   {
    "cell_type": "code",
@@ -104,7 +107,8 @@
     "print('Params:', sum(p.numel() for p in block.parameters()))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0bd03e9b2d4f"
   },
   {
    "cell_type": "code",
@@ -114,7 +118,8 @@
     "check('gpt2_block')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5f28469af751"
   }
  ]
 }
diff --git a/solutions/14_kv_cache_solution.ipynb b/solutions/14_kv_cache_solution.ipynb
index 650149d..fdcf7e0 100644
--- a/solutions/14_kv_cache_solution.ipynb
+++ b/solutions/14_kv_cache_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n",
     "# 🔴 Solution: KV Cache Attention\n",
     "\n",
     "Reference solution — multi-head attention with KV caching for autoregressive inference."
    ],
-   "outputs": []
+   "id": "cell-83ea25e0b178"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5215aed2af81"
   },
   {
    "cell_type": "code",
@@ -34,7 +35,8 @@
     "import torch.nn as nn\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-40f82d0a853e"
   },
   {
    "cell_type": "code",
@@ -84,7 +86,8 @@
     "        out = self.W_o(attn.transpose(1, 2).contiguous().view(B, S_new, -1))\n",
     "        return out, new_cache"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c538127a36f3"
   },
   {
    "cell_type": "code",
@@ -106,7 +109,8 @@
     "print('Match:', torch.allclose(full_out, inc_out, atol=1e-5))\n",
     "print('Final cache K shape:', cache[0].shape)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c96368b52bdf"
   },
   {
    "cell_type": "code",
@@ -116,7 +120,8 @@
     "from torch_judge import check\n",
     "check('kv_cache')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f62c94db448c"
   }
  ],
  "metadata": {
@@ -131,5 +136,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/15_mlp_solution.ipynb b/solutions/15_mlp_solution.ipynb
index ae3759b..1562319 100644
--- a/solutions/15_mlp_solution.ipynb
+++ b/solutions/15_mlp_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n",
     "# 🟠 Solution: SwiGLU MLP\n",
     "\n",
     "Reference solution — gated feed-forward network used in LLaMA, Mistral, and PaLM."
    ],
-   "outputs": []
+   "id": "cell-327c674b09a3"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8a670c67bd2d"
   },
   {
    "cell_type": "code",
@@ -34,7 +35,8 @@
     "import torch.nn as nn\n",
     "import torch.nn.functional as F"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-90e60fb22984"
   },
   {
    "cell_type": "code",
@@ -53,7 +55,8 @@
     "    def forward(self, x):\n",
     "        return self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fa2b06c62690"
   },
   {
    "cell_type": "code",
@@ -65,7 +68,8 @@
     "print('Output:', mlp(x).shape)\n",
     "print('Params:', sum(p.numel() for p in mlp.parameters()))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0a7f6956911d"
   },
   {
    "cell_type": "code",
@@ -75,7 +79,8 @@
     "from torch_judge import check\n",
     "check('mlp')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3c2ba9d27e2a"
   }
  ],
  "metadata": {
@@ -90,5 +95,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/16_cross_entropy_solution.ipynb b/solutions/16_cross_entropy_solution.ipynb
index a8945b0..fd2fd80 100644
--- a/solutions/16_cross_entropy_solution.ipynb
+++ b/solutions/16_cross_entropy_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n",
     "# Solution: Cross-Entropy Loss\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-2fb3846edd6b"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-810fb0af01e1"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c43c9eb70ab8"
   },
   {
    "cell_type": "code",
@@ -45,7 +47,8 @@
     "    log_probs = logits - torch.logsumexp(logits, dim=-1, keepdim=True)\n",
     "    return -log_probs[torch.arange(targets.shape[0]), targets].mean()"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-165a128e570b"
   },
   {
    "cell_type": "code",
@@ -58,7 +61,8 @@
     "print('Loss:', cross_entropy_loss(logits, targets).item())\n",
     "print('Ref: ', torch.nn.functional.cross_entropy(logits, targets).item())"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ec2240ea845a"
   },
   {
    "cell_type": "code",
@@ -68,7 +72,8 @@
     "from torch_judge import check\n",
     "check('cross_entropy')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8c340ab8a582"
   }
  ],
  "metadata": {
@@ -83,5 +88,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/17_dropout_solution.ipynb b/solutions/17_dropout_solution.ipynb
index 1ce4b5d..774c200 100644
--- a/solutions/17_dropout_solution.ipynb
+++ b/solutions/17_dropout_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n",
     "# Solution: Implement Dropout\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-3c3f4a26edc9"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ffc4fae24ddd"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ca67bed3263d"
   },
   {
    "cell_type": "code",
@@ -53,7 +55,8 @@
     "        mask = (torch.rand_like(x) > self.p).float()\n",
     "        return x * mask / (1 - self.p)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0e5082346fc8"
   },
   {
    "cell_type": "code",
@@ -68,7 +71,8 @@
     "d.eval()\n",
     "print('Eval: ', d(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-532dae90c27a"
   },
   {
    "cell_type": "code",
@@ -78,7 +82,8 @@
     "from torch_judge import check\n",
     "check('dropout')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ed3775a0dd83"
   }
  ],
  "metadata": {
@@ -93,5 +98,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/18_embedding_solution.ipynb b/solutions/18_embedding_solution.ipynb
index d1330e0..61ae34b 100644
--- a/solutions/18_embedding_solution.ipynb
+++ b/solutions/18_embedding_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n",
     "# Solution: Embedding Layer\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-afda4068c60e"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-42988ce77d79"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3c6f7bf0ce17"
   },
   {
    "cell_type": "code",
@@ -50,7 +52,8 @@
     "    def forward(self, indices):\n",
     "        return self.weight[indices]"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a1ed06c0a969"
   },
   {
    "cell_type": "code",
@@ -63,7 +66,8 @@
     "print('Output shape:', emb(idx).shape)\n",
     "print('Matches manual:', torch.equal(emb(idx)[0], emb.weight[0]))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d2d8eec1e56d"
   },
   {
    "cell_type": "code",
@@ -73,7 +77,8 @@
     "from torch_judge import check\n",
     "check('embedding')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d8dc2ce4fddb"
   }
  ],
  "metadata": {
@@ -88,5 +93,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/19_gelu_solution.ipynb b/solutions/19_gelu_solution.ipynb
index 368e713..176e488 100644
--- a/solutions/19_gelu_solution.ipynb
+++ b/solutions/19_gelu_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n",
     "# Solution: GELU Activation\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-aedbd04a46e8"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1a0a9aa4bb13"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-9df3ad00c04d"
   },
   {
    "cell_type": "code",
@@ -45,7 +47,8 @@
     "def my_gelu(x):\n",
     "    return 0.5 * x * (1.0 + torch.erf(x / math.sqrt(2.0)))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-996389dd78fd"
   },
   {
    "cell_type": "code",
@@ -57,7 +60,8 @@
     "print('Output:', my_gelu(x))\n",
     "print('Ref:   ', torch.nn.functional.gelu(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b26eb8ba6427"
   },
   {
    "cell_type": "code",
@@ -67,7 +71,8 @@
     "from torch_judge import check\n",
     "check('gelu')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8587c88d70fe"
   }
  ],
  "metadata": {
@@ -82,5 +87,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/20_weight_init_solution.ipynb b/solutions/20_weight_init_solution.ipynb
index c8a63df..2625d64 100644
--- a/solutions/20_weight_init_solution.ipynb
+++ b/solutions/20_weight_init_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n",
     "# Solution: Kaiming Initialization\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-b51d57aa98eb"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-233cc1bc04db"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0f3acbf9d368"
   },
   {
    "cell_type": "code",
@@ -49,7 +51,8 @@
     "        weight.normal_(0, std)\n",
     "    return weight"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6a917ca0d9c3"
   },
   {
    "cell_type": "code",
@@ -63,7 +66,8 @@
     "print(f'Mean: {w.mean():.4f} (expect ~0)')\n",
     "print(f'Std:  {w.std():.4f} (expect {math.sqrt(2/512):.4f})')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-54ff04551f33"
   },
   {
    "cell_type": "code",
@@ -73,7 +77,8 @@
     "from torch_judge import check\n",
     "check('weight_init')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f5aeb33a9c33"
   }
  ],
  "metadata": {
@@ -88,5 +93,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/21_gradient_clipping_solution.ipynb b/solutions/21_gradient_clipping_solution.ipynb
index 0bfce24..1215192 100644
--- a/solutions/21_gradient_clipping_solution.ipynb
+++ b/solutions/21_gradient_clipping_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n",
     "# Solution: Gradient Norm Clipping\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-186fcd826cfb"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fee049700f77"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c5983d553399"
   },
   {
    "cell_type": "code",
@@ -50,7 +52,8 @@
     "            p.grad.mul_(clip_coef)\n",
     "    return total_norm.item()"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-64232e18a698"
   },
   {
    "cell_type": "code",
@@ -65,7 +68,8 @@
     "print('After: ', p.grad.norm().item())\n",
     "print('Returned:', orig)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-97b9bb9b160c"
   },
   {
    "cell_type": "code",
@@ -75,7 +79,8 @@
     "from torch_judge import check\n",
     "check('gradient_clipping')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e2b40dd48e5f"
   }
  ],
  "metadata": {
@@ -90,5 +95,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/22_conv2d_solution.ipynb b/solutions/22_conv2d_solution.ipynb
index 3a49c5f..02cdd33 100644
--- a/solutions/22_conv2d_solution.ipynb
+++ b/solutions/22_conv2d_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n",
     "# Solution: 2D Convolution\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-34100e03ec1d"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-aebb546ee54c"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn.functional as F"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-45eb56e75d78"
   },
   {
    "cell_type": "code",
@@ -55,7 +57,8 @@
     "        out = out + bias.view(1, -1, 1, 1)\n",
     "    return out"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-27160d106e01"
   },
   {
    "cell_type": "code",
@@ -68,7 +71,8 @@
     "print('Output:', my_conv2d(x, w).shape)\n",
     "print('Match:', torch.allclose(my_conv2d(x, w), F.conv2d(x, w), atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-56d3b678d995"
   },
   {
    "cell_type": "code",
@@ -78,7 +82,8 @@
     "from torch_judge import check\n",
     "check('conv2d')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-781b011cacf3"
   }
  ],
  "metadata": {
@@ -93,5 +98,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/23_cross_attention_solution.ipynb b/solutions/23_cross_attention_solution.ipynb
index bb7cc66..f9b598a 100644
--- a/solutions/23_cross_attention_solution.ipynb
+++ b/solutions/23_cross_attention_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n",
     "# Solution: Multi-Head Cross-Attention\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-bbf14c9506bc"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-027a5a2149ee"
   },
   {
    "cell_type": "code",
@@ -34,7 +35,8 @@
     "import torch.nn as nn\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-db9e5fb63d84"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "        attn = torch.matmul(weights, v)\n",
     "        return self.W_o(attn.transpose(1, 2).contiguous().view(B, S_q, -1))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f243bbfab8d9"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "x_kv = torch.randn(2, 10, 64)\n",
     "print('Output:', attn(x_q, x_kv).shape)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bd8c57c0eb94"
   },
   {
    "cell_type": "code",
@@ -87,7 +91,8 @@
     "from torch_judge import check\n",
     "check('cross_attention')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-cb9690103dfb"
   }
  ],
  "metadata": {
@@ -102,5 +107,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/24_rope_solution.ipynb b/solutions/24_rope_solution.ipynb
index 02829b3..2fb01e7 100644
--- a/solutions/24_rope_solution.ipynb
+++ b/solutions/24_rope_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n",
     "# Solution: Rotary Position Embedding (RoPE)\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-c2f10e6d11ca"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f9142459ffc2"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-10f6cc867f9f"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "\n",
     "    return rotate(q), rotate(k)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-115048dd87b9"
   },
   {
    "cell_type": "code",
@@ -72,7 +75,8 @@
     "print('Shape preserved:', qr.shape == q.shape)\n",
     "print('Norm preserved:', torch.allclose(q.norm(dim=-1), qr.norm(dim=-1), atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-432b8aa0e19d"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('rope')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a47fe29dcc3f"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/25_flash_attention_solution.ipynb b/solutions/25_flash_attention_solution.ipynb
index 31135d6..1e275ac 100644
--- a/solutions/25_flash_attention_solution.ipynb
+++ b/solutions/25_flash_attention_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n",
     "# Solution: Flash Attention (Tiled)\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-919746012c89"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-76cb2dc5db3b"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c06bd37bbf50"
   },
   {
    "cell_type": "code",
@@ -65,7 +67,8 @@
     "        output[:, i:i+block_size] = acc / row_sum\n",
     "    return output"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d86d1c076943"
   },
   {
    "cell_type": "code",
@@ -81,7 +84,8 @@
     "print('Shape:', out.shape)\n",
     "print('Max diff:', (out - ref).abs().max().item())"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6bad8f8fab76"
   },
   {
    "cell_type": "code",
@@ -91,7 +95,8 @@
     "from torch_judge import check\n",
     "check('flash_attention')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-751d77421b4b"
   }
  ],
  "metadata": {
@@ -106,5 +111,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/26_lora_solution.ipynb b/solutions/26_lora_solution.ipynb
index 49e1e8c..e06a610 100644
--- a/solutions/26_lora_solution.ipynb
+++ b/solutions/26_lora_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n",
     "# Solution: LoRA (Low-Rank Adaptation)\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-ff8e0e744e26"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2efd4a7e5b1f"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5cb7cfcddec4"
   },
   {
    "cell_type": "code",
@@ -55,7 +57,8 @@
     "    def forward(self, x):\n",
     "        return self.linear(x) + (x @ self.lora_A.T @ self.lora_B.T) * self.scaling"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3aff23818b29"
   },
   {
    "cell_type": "code",
@@ -70,7 +73,8 @@
     "total = sum(p.numel() for p in layer.parameters())\n",
     "print(f'Trainable: {trainable}/{total} ({100*trainable/total:.1f}%)')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-db39f1995c2e"
   },
   {
    "cell_type": "code",
@@ -80,7 +84,8 @@
     "from torch_judge import check\n",
     "check('lora')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-aa8801cd066b"
   }
  ],
  "metadata": {
@@ -95,5 +100,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/27_vit_patch_solution.ipynb b/solutions/27_vit_patch_solution.ipynb
index 73004c4..ccb38ba 100644
--- a/solutions/27_vit_patch_solution.ipynb
+++ b/solutions/27_vit_patch_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n",
     "# Solution: ViT Patch Embedding\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-1052469c40dd"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e059676064f7"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-407e7f22d20b"
   },
   {
    "cell_type": "code",
@@ -57,7 +59,8 @@
     "        x = x.permute(0, 2, 4, 1, 3, 5).reshape(B, n_h * n_w, C * p * p)\n",
     "        return self.proj(x)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-568c4e982b4e"
   },
   {
    "cell_type": "code",
@@ -70,7 +73,8 @@
     "print('Output:', pe(x).shape)\n",
     "print('Patches:', pe.num_patches)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-90513a00d6e2"
   },
   {
    "cell_type": "code",
@@ -80,7 +84,8 @@
     "from torch_judge import check\n",
     "check('vit_patch')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bd86b6064d17"
   }
  ],
  "metadata": {
@@ -95,5 +100,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/28_moe_solution.ipynb b/solutions/28_moe_solution.ipynb
index 5c1eb97..cabdbb5 100644
--- a/solutions/28_moe_solution.ipynb
+++ b/solutions/28_moe_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n",
     "# Solution: Mixture of Experts (MoE)\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-7cb38bd21dd5"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-20f305ddbf45"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c2b8322b82d7"
   },
   {
    "cell_type": "code",
@@ -70,7 +72,8 @@
     "                    output[mask] += weights[mask, k:k+1] * self.experts[e](x_flat[mask])\n",
     "        return output.reshape(orig_shape)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6c3e1b0d0c5f"
   },
   {
    "cell_type": "code",
@@ -83,7 +86,8 @@
     "print('Output:', moe(x).shape)\n",
     "print('Params:', sum(p.numel() for p in moe.parameters()))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b415161dac16"
   },
   {
    "cell_type": "code",
@@ -93,7 +97,8 @@
     "from torch_judge import check\n",
     "check('moe')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-caaff8dc98d0"
   }
  ],
  "metadata": {
@@ -108,5 +113,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/29_adam_solution.ipynb b/solutions/29_adam_solution.ipynb
index c31ae3b..18c9195 100644
--- a/solutions/29_adam_solution.ipynb
+++ b/solutions/29_adam_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n",
     "# Solution: Adam Optimizer\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-869825a8f994"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5e26e6fd8079"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f5375a3e3db7"
   },
   {
    "cell_type": "code",
@@ -68,7 +70,8 @@
     "            if p.grad is not None:\n",
     "                p.grad.zero_()"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d9ba41b1a602"
   },
   {
    "cell_type": "code",
@@ -86,7 +89,8 @@
     "    opt.zero_grad()\n",
     "    print(f'Step {i}: loss={loss.item():.4f}')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a5facef72449"
   },
   {
    "cell_type": "code",
@@ -96,7 +100,8 @@
     "from torch_judge import check\n",
     "check('adam')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bd60a84cc521"
   }
  ],
  "metadata": {
@@ -111,5 +116,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/30_cosine_lr_solution.ipynb b/solutions/30_cosine_lr_solution.ipynb
index 74924cd..5428eca 100644
--- a/solutions/30_cosine_lr_solution.ipynb
+++ b/solutions/30_cosine_lr_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n",
     "# Solution: Cosine LR Scheduler with Warmup\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-bc8e2d969538"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a84f751d833a"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b8c396649b59"
   },
   {
    "cell_type": "code",
@@ -49,7 +51,8 @@
     "    progress = (step - warmup_steps) / (total_steps - warmup_steps)\n",
     "    return min_lr + 0.5 * (max_lr - min_lr) * (1.0 + math.cos(math.pi * progress))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-43b9d284431c"
   },
   {
    "cell_type": "code",
@@ -60,7 +63,8 @@
     "lrs = [cosine_lr_schedule(i, 100, 10, 0.001) for i in range(101)]\n",
     "print(f'Start: {lrs[0]:.6f}, Warmup end: {lrs[10]:.6f}, Mid: {lrs[55]:.6f}, End: {lrs[100]:.6f}')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8c62685b9faa"
   },
   {
    "cell_type": "code",
@@ -70,7 +74,8 @@
     "from torch_judge import check\n",
     "check('cosine_lr')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d0d8797a08ee"
   }
  ],
  "metadata": {
@@ -85,5 +90,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/31_gradient_accumulation_solution.ipynb b/solutions/31_gradient_accumulation_solution.ipynb
index c289074..8cb372b 100644
--- a/solutions/31_gradient_accumulation_solution.ipynb
+++ b/solutions/31_gradient_accumulation_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n",
     "# Solution: Gradient Accumulation\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-aa4172326b18"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-775aa8fec4c2"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-9c29cf9fa41a"
   },
   {
    "cell_type": "code",
@@ -53,7 +55,8 @@
     "    optimizer.step()\n",
     "    return total_loss"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-52e5829eeaff"
   },
   {
    "cell_type": "code",
@@ -67,7 +70,8 @@
     "    [(torch.randn(2, 4), torch.randn(2, 2)) for _ in range(4)])\n",
     "print('Accumulated loss:', loss)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5745687ce4a4"
   },
   {
    "cell_type": "code",
@@ -77,7 +81,8 @@
     "from torch_judge import check\n",
     "check('gradient_accumulation')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1269a4b13577"
   }
  ],
  "metadata": {
@@ -92,5 +97,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/32_topk_sampling_solution.ipynb b/solutions/32_topk_sampling_solution.ipynb
index 31de067..3c3f6d3 100644
--- a/solutions/32_topk_sampling_solution.ipynb
+++ b/solutions/32_topk_sampling_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n",
     "# Solution: Top-k / Top-p Sampling\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-08bed1e05d70"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e626587f927a"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-138508e1d532"
   },
   {
    "cell_type": "code",
@@ -56,7 +58,8 @@
     "    probs = torch.softmax(logits, dim=-1)\n",
     "    return torch.multinomial(probs, 1).item()"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d89d26963ccf"
   },
   {
    "cell_type": "code",
@@ -68,7 +71,8 @@
     "print('top_k=1:', sample_top_k_top_p(logits.clone(), top_k=1))\n",
     "print('top_p=0.5:', sample_top_k_top_p(logits.clone(), top_p=0.5))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4d5635dc6b00"
   },
   {
    "cell_type": "code",
@@ -78,7 +82,8 @@
     "from torch_judge import check\n",
     "check('topk_sampling')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b0c1280f2d32"
   }
  ],
  "metadata": {
@@ -93,5 +98,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/33_beam_search_solution.ipynb b/solutions/33_beam_search_solution.ipynb
index 969edfa..e27d876 100644
--- a/solutions/33_beam_search_solution.ipynb
+++ b/solutions/33_beam_search_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n",
     "# Solution: Beam Search Decoding\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-1ffe13920654"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a17595909cb7"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b34e0b7c3141"
   },
   {
    "cell_type": "code",
@@ -62,7 +64,8 @@
     "    all_seqs.sort(key=lambda x: x[0], reverse=True)\n",
     "    return all_seqs[0][1]"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f4d22eb16dd7"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "seq = beam_search(simple_fn, start_token=0, max_len=5, beam_width=2, eos_token=4)\n",
     "print('Sequence:', seq)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-35e190973e09"
   },
   {
    "cell_type": "code",
@@ -87,7 +91,8 @@
     "from torch_judge import check\n",
     "check('beam_search')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-cd76a2ec0f70"
   }
  ],
  "metadata": {
@@ -102,5 +107,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/34_speculative_decoding_solution.ipynb b/solutions/34_speculative_decoding_solution.ipynb
index c13674a..5912325 100644
--- a/solutions/34_speculative_decoding_solution.ipynb
+++ b/solutions/34_speculative_decoding_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n",
     "# Solution: Speculative Decoding\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-d43846ee867a"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ed354910bfe6"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-73e3a3ec063e"
   },
   {
    "cell_type": "code",
@@ -60,7 +62,8 @@
     "            return accepted\n",
     "    return accepted"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-68b83cc78318"
   },
   {
    "cell_type": "code",
@@ -73,7 +76,8 @@
     "tokens = torch.tensor([2, 5, 1, 8])\n",
     "print('Perfect draft:', speculative_decode(probs, probs, tokens))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-732383d0709d"
   },
   {
    "cell_type": "code",
@@ -83,7 +87,8 @@
     "from torch_judge import check\n",
     "check('speculative_decoding')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-940e6c72e58e"
   }
  ],
  "metadata": {
@@ -98,5 +103,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/35_bpe_solution.ipynb b/solutions/35_bpe_solution.ipynb
index 1b0d1e5..4472a2d 100644
--- a/solutions/35_bpe_solution.ipynb
+++ b/solutions/35_bpe_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n",
     "# Solution: Byte-Pair Encoding (BPE)\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-bc2eec5fe687"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0073db25e40f"
   },
   {
    "cell_type": "code",
@@ -32,7 +33,8 @@
    "source": [
     "# No imports needed"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-dd99ec77f8a0"
   },
   {
    "cell_type": "code",
@@ -89,7 +91,8 @@
     "            all_tokens.extend(symbols)\n",
     "        return all_tokens"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-dc18695aafd1"
   },
   {
    "cell_type": "code",
@@ -102,7 +105,8 @@
     "print('Merges:', bpe.merges)\n",
     "print('Encode:', bpe.encode('low lower newest'))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d80bd6a1913e"
   },
   {
    "cell_type": "code",
@@ -112,7 +116,8 @@
     "from torch_judge import check\n",
     "check('bpe')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e3a37684c4f6"
   }
  ],
  "metadata": {
@@ -127,5 +132,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/36_int8_quantization_solution.ipynb b/solutions/36_int8_quantization_solution.ipynb
index 5a5e3ec..6b19ca3 100644
--- a/solutions/36_int8_quantization_solution.ipynb
+++ b/solutions/36_int8_quantization_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n",
     "# Solution: INT8 Quantized Linear\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-c3d51ae293a7"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3b7204d787e7"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-addc5b1d20ea"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "            out = out + self.bias\n",
     "        return out"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6c065dfe4d16"
   },
   {
    "cell_type": "code",
@@ -72,7 +75,8 @@
     "print('Weight dtype:', q.weight_int8.dtype)\n",
     "print('Compression: float32 -> int8 = 4x')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1301564ac769"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('int8_quantization')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-731a5c4dcd89"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/37_dpo_loss_solution.ipynb b/solutions/37_dpo_loss_solution.ipynb
index 0606b84..ca8edfc 100644
--- a/solutions/37_dpo_loss_solution.ipynb
+++ b/solutions/37_dpo_loss_solution.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n",
     "# Solution: DPO (Direct Preference Optimization) Loss\n",
     "\n",
     "Reference solution."
    ],
-   "outputs": []
+   "id": "cell-7143facb4472"
   },
   {
    "cell_type": "code",
@@ -23,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f1b8f6b079d6"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "import torch\n",
     "import torch.nn.functional as F"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2e84e0de7215"
   },
   {
    "cell_type": "code",
@@ -48,7 +50,8 @@
     "    rejected_rewards = beta * (policy_rejected_logps - ref_rejected_logps)\n",
     "    return -F.logsigmoid(chosen_rewards - rejected_rewards).mean()"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-539ed9540d9b"
   },
   {
    "cell_type": "code",
@@ -62,7 +65,8 @@
     "ref_r = torch.tensor([-1.0, -1.0])\n",
     "print('Loss:', dpo_loss(chosen, rejected, ref_c, ref_r, beta=0.1).item())"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1652483434b7"
   },
   {
    "cell_type": "code",
@@ -72,7 +76,8 @@
     "from torch_judge import check\n",
     "check('dpo_loss')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ef88c30f1c7a"
   }
  ],
  "metadata": {
@@ -87,5 +92,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/solutions/38_grpo_loss_solution.ipynb b/solutions/38_grpo_loss_solution.ipynb
index 05ce2f6..6910cad 100644
--- a/solutions/38_grpo_loss_solution.ipynb
+++ b/solutions/38_grpo_loss_solution.ipynb
@@ -4,11 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n",
     "# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
     "\n",
     "Reference solution."
-   ]
+   ],
+   "id": "cell-c3c37a3f40b5"
   },
   {
    "cell_type": "code",
@@ -22,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5291620c23dc"
   },
   {
    "cell_type": "code",
@@ -33,7 +35,8 @@
     "import torch\n",
     "import torch.nn.functional as F\n",
     "from torch import Tensor"
-   ]
+   ],
+   "id": "cell-13d027881df4"
   },
   {
    "cell_type": "code",
@@ -67,7 +70,8 @@
     "\n",
     "    # GRPO objective: -E[A_i * logpi_i]\n",
     "    return -(advantages_detached * logps).mean()\n"
-   ]
+   ],
+   "id": "cell-d878fd547e7a"
   },
   {
    "cell_type": "code",
@@ -80,7 +84,8 @@
     "rewards = torch.tensor([1.0, 0.8, 0.2, 0.0])\n",
     "group_ids = torch.tensor([0, 0, 1, 1])\n",
     "print('Loss:', grpo_loss(logps, rewards, group_ids).item())"
-   ]
+   ],
+   "id": "cell-289d9d048e3c"
   },
   {
    "cell_type": "code",
@@ -90,7 +95,8 @@
    "source": [
     "from torch_judge import check\n",
     "check('grpo_loss')"
-   ]
+   ],
+   "id": "cell-d696b1c08d9b"
   }
  ],
  "metadata": {
diff --git a/solutions/39_ppo_loss_solution.ipynb b/solutions/39_ppo_loss_solution.ipynb
index 1818a32..ec7765c 100644
--- a/solutions/39_ppo_loss_solution.ipynb
+++ b/solutions/39_ppo_loss_solution.ipynb
@@ -4,11 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n",
     "# Solution: PPO Clipped Loss\n",
     "\n",
     "Reference solution for the PPO clipped surrogate loss task.\n"
-   ]
+   ],
+   "id": "cell-73977ce3fc1b"
   },
   {
    "cell_type": "code",
@@ -22,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-21f4c0dae2bc"
   },
   {
    "cell_type": "code",
@@ -33,7 +35,8 @@
     "import torch\n",
     "import torch.nn.functional as F\n",
     "from torch import Tensor\n"
-   ]
+   ],
+   "id": "cell-958e8ccd84ee"
   },
   {
    "cell_type": "code",
@@ -65,7 +68,8 @@
     "\n",
     "    # PPO objective: negative mean of the more conservative objective\n",
     "    return -torch.min(unclipped, clipped).mean()\n"
-   ]
+   ],
+   "id": "cell-29d7618b2b78"
   },
   {
    "cell_type": "code",
@@ -78,7 +82,8 @@
     "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n",
     "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n",
     "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n"
-   ]
+   ],
+   "id": "cell-c46aa82b9f10"
   },
   {
    "cell_type": "code",
@@ -88,7 +93,8 @@
    "source": [
     "from torch_judge import check\n",
     "check('ppo_loss')\n"
-   ]
+   ],
+   "id": "cell-92aced739aa3"
   }
  ],
  "metadata": {
diff --git a/solutions/40_linear_regression_solution.ipynb b/solutions/40_linear_regression_solution.ipynb
index a745293..bcf1e04 100644
--- a/solutions/40_linear_regression_solution.ipynb
+++ b/solutions/40_linear_regression_solution.ipynb
@@ -4,11 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n",
     "# 🟡 Solution: Linear Regression\n",
     "\n",
     "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
-   ]
+   ],
+   "id": "cell-15853200c649"
   },
   {
    "cell_type": "code",
@@ -22,7 +23,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8a24230361ae"
   },
   {
    "cell_type": "code",
@@ -32,7 +34,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-08d16fb79bde"
   },
   {
    "cell_type": "code",
@@ -89,7 +92,8 @@
     "        b = layer.bias.data.squeeze(0)    # scalar ()\n",
     "        return w, b"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-001be78ef74a"
   },
   {
    "cell_type": "code",
@@ -110,7 +114,8 @@
     "    print(f\"{name:13s}  w={w.tolist()}  b={b.item():.4f}\")\n",
     "print(f\"{'True':13s}  w={true_w.tolist()}  b=3.0000\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-904aa56cf10b"
   },
   {
    "cell_type": "code",
@@ -121,7 +126,8 @@
     "from torch_judge import check\n",
     "check(\"linear_regression\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4b31f4b8ca0a"
   }
  ],
  "metadata": {
@@ -136,5 +142,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/00_welcome.ipynb b/templates/00_welcome.ipynb
index c3498f5..868418c 100644
--- a/templates/00_welcome.ipynb
+++ b/templates/00_welcome.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/00_welcome.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/00_welcome.ipynb)\n",
     "\n",
     "# 🔥 TorchCode — PyTorch Operator Practice\n",
     "\n",
@@ -33,8 +33,9 @@
     "\n",
     "## Quick Start\n",
     "\n",
-    "📖 **Reference solutions in Colab**: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) — Start with ReLU. Or use the **Colab** links in the table below for each solution."
-   ]
+    "📖 **Reference solutions in Colab**: [Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) — Start with ReLU. Or use the **Colab** links in the table below for each solution."
+   ],
+   "id": "cell-d30023ba4676"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
     "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
     "except ImportError:\n",
     "    pass\n"
-   ]
+   ],
+   "id": "cell-7abf370eea8e"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
    "source": [
     "from torch_judge import status\n",
     "status()"
-   ]
+   ],
+   "id": "cell-cd409a5c25d0"
   },
   {
    "cell_type": "markdown",
@@ -145,7 +148,8 @@
     "check(\"relu\")              # Judge your implementation\n",
     "hint(\"causal_attention\")   # Get a hint\n",
     "```"
-   ]
+   ],
+   "id": "cell-5f2cffdcaf3d"
   }
  ],
  "metadata": {
@@ -160,5 +164,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/01_relu.ipynb b/templates/01_relu.ipynb
index abe82e0..d1f61d7 100644
--- a/templates/01_relu.ipynb
+++ b/templates/01_relu.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb)\n",
     "\n",
     "# 🟢 Easy: Implement ReLU\n",
     "\n",
@@ -28,7 +28,7 @@
     "Output: tensor([ 0.,  0., 0., 1., 2.])\n",
     "```"
    ],
-   "outputs": []
+   "id": "cell-488e24a3b562"
   },
   {
    "cell_type": "code",
@@ -42,7 +42,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2115a17fb167"
   },
   {
    "cell_type": "code",
@@ -51,7 +52,8 @@
     "import torch"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1846b083bfa6"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     ""
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8dbfa8b0061b"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "print(\"Shape: \", relu(x).shape)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ebe90dc3c828"
   },
   {
    "cell_type": "code",
@@ -88,7 +92,8 @@
     "check(\"relu\")"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fc4b8825c5ee"
   }
  ],
  "metadata": {
@@ -111,5 +116,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/02_softmax.ipynb b/templates/02_softmax.ipynb
index b8ed6f8..5170d95 100644
--- a/templates/02_softmax.ipynb
+++ b/templates/02_softmax.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb)\n",
     "\n",
     "# 🟢 Easy: Implement Softmax\n",
     "\n",
@@ -28,7 +28,7 @@
     "Output: tensor([0.0900, 0.2447, 0.6652])  # sums to 1.0\n",
     "```"
    ],
-   "outputs": []
+   "id": "cell-ee62895f3be1"
   },
   {
    "cell_type": "code",
@@ -42,7 +42,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bc1c7e2c95a0"
   },
   {
    "cell_type": "code",
@@ -51,7 +52,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5b11467cc2d8"
   },
   {
    "cell_type": "code",
@@ -63,7 +65,8 @@
     "def my_softmax(x: torch.Tensor, dim: int = -1) -> torch.Tensor:\n",
     "    pass  # Replace this"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-235a94a146f9"
   },
   {
    "cell_type": "code",
@@ -76,7 +79,8 @@
     "print(\"Sum:   \", my_softmax(x, dim=-1).sum())  # should be ~1.0\n",
     "print(\"Ref:   \", torch.softmax(x, dim=-1))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1c9f8f50df2b"
   },
   {
    "cell_type": "code",
@@ -87,7 +91,8 @@
     "from torch_judge import check\n",
     "check(\"softmax\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4da14d5295f6"
   }
  ],
  "metadata": {
@@ -102,5 +107,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/03_linear.ipynb b/templates/03_linear.ipynb
index 29e4664..8005128 100644
--- a/templates/03_linear.ipynb
+++ b/templates/03_linear.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb)\n",
     "\n",
     "# 🟡 Medium: Simple Linear Layer\n",
     "\n",
@@ -24,7 +24,7 @@
     "- `forward(x)` computes `x @ W^T + b`\n",
     "- Do **NOT** use `torch.nn.Linear`"
    ],
-   "outputs": []
+   "id": "cell-11eb928f56bb"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-78f5c77ff8e0"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6cc01b59b785"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
     "        pass  # Compute y = x @ W^T + b"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d8adc3a9e492"
   },
   {
    "cell_type": "code",
@@ -80,7 +83,8 @@
     "y = layer.forward(x)\n",
     "print(\"Output shape:\", y.shape)        # should be (2, 4)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f2c84b5f5b6a"
   },
   {
    "cell_type": "code",
@@ -91,7 +95,8 @@
     "from torch_judge import check\n",
     "check(\"linear\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-31cc2455bc19"
   }
  ],
  "metadata": {
@@ -106,5 +111,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/04_layernorm.ipynb b/templates/04_layernorm.ipynb
index 89f030b..dc35b41 100644
--- a/templates/04_layernorm.ipynb
+++ b/templates/04_layernorm.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb)\n",
     "\n",
     "# 🟡 Medium: Implement LayerNorm\n",
     "\n",
@@ -30,7 +30,7 @@
     "- Normalize over the last dimension only\n",
     "- Must support autograd"
    ],
-   "outputs": []
+   "id": "cell-13dd55cba342"
   },
   {
    "cell_type": "code",
@@ -44,7 +44,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-22f090de81a2"
   },
   {
    "cell_type": "code",
@@ -53,7 +54,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-333ba7d0eadb"
   },
   {
    "cell_type": "code",
@@ -65,7 +67,8 @@
     "def my_layer_norm(x, gamma, beta, eps=1e-5):\n",
     "    pass  # Replace this"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-591fa086ab56"
   },
   {
    "cell_type": "code",
@@ -84,7 +87,8 @@
     "print(\"Your output std: \", out.std(dim=-1))     # should be ~1\n",
     "print(\"Match ref?      \", torch.allclose(out, ref, atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6ee009e37bd9"
   },
   {
    "cell_type": "code",
@@ -95,7 +99,8 @@
     "from torch_judge import check\n",
     "check(\"layernorm\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e2a35caadafb"
   }
  ],
  "metadata": {
@@ -110,5 +115,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/05_attention.ipynb b/templates/05_attention.ipynb
index 047243e..f0a50a1 100644
--- a/templates/05_attention.ipynb
+++ b/templates/05_attention.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb)\n",
     "\n",
     "# 🔴 Hard: Softmax Attention\n",
     "\n",
@@ -27,7 +27,8 @@
     "- You **may** use `torch.softmax` and `torch.bmm`\n",
     "- Must support autograd\n",
     "- Must handle cross-attention (seq_q ≠ seq_k)"
-   ]
+   ],
+   "id": "cell-8a67b627466a"
   },
   {
    "cell_type": "code",
@@ -41,7 +42,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bd03e7868e64"
   },
   {
    "cell_type": "code",
@@ -51,7 +53,8 @@
    "source": [
     "import torch\n",
     "import math"
-   ]
+   ],
+   "id": "cell-7acc2da3c658"
   },
   {
    "cell_type": "code",
@@ -63,7 +66,8 @@
     "\n",
     "def scaled_dot_product_attention(Q, K, V):\n",
     "    pass  # Replace this"
-   ]
+   ],
+   "id": "cell-8b9cfaebfaee"
   },
   {
    "cell_type": "code",
@@ -88,7 +92,8 @@
     "V2 = torch.randn(1, 5, 32)\n",
     "out2 = scaled_dot_product_attention(Q2, K2, V2)\n",
     "print(\"Cross-attn shape:\", out2.shape)     # should be (1, 3, 32)"
-   ]
+   ],
+   "id": "cell-7f5ed83e3b5a"
   },
   {
    "cell_type": "code",
@@ -99,7 +104,8 @@
     "# ✅ SUBMIT\n",
     "from torch_judge import check\n",
     "check(\"attention\")"
-   ]
+   ],
+   "id": "cell-91157d972f57"
   }
  ],
  "metadata": {
@@ -114,5 +120,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/06_multihead_attention.ipynb b/templates/06_multihead_attention.ipynb
index 7812714..4ba8334 100644
--- a/templates/06_multihead_attention.ipynb
+++ b/templates/06_multihead_attention.ipynb
@@ -5,7 +5,7 @@
    "id": "2bfeea64",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb)\n",
     "\n",
     "# 🔴 Hard: Multi-Head Attention\n",
     "\n",
@@ -78,7 +78,8 @@
     "\n",
     "    def forward(self, Q, K, V):\n",
     "        pass  # Implement multi-head attention"
-   ]
+   ],
+   "id": "cell-3a07876c936c"
   },
   {
    "cell_type": "code",
@@ -102,7 +103,8 @@
     "V = torch.randn(1, 7, 32)\n",
     "out2 = mha.forward(Q, K, V)\n",
     "print(\"Cross-attn shape:\", out2.shape)     # (1, 3, 32)"
-   ]
+   ],
+   "id": "cell-c902eedf2923"
   },
   {
    "cell_type": "code",
@@ -113,7 +115,8 @@
     "# ✅ SUBMIT\n",
     "from torch_judge import check\n",
     "check(\"mha\")"
-   ]
+   ],
+   "id": "cell-40d9694d03c5"
   }
  ],
  "metadata": {
diff --git a/templates/07_batchnorm.ipynb b/templates/07_batchnorm.ipynb
index 6199e93..4f1645c 100644
--- a/templates/07_batchnorm.ipynb
+++ b/templates/07_batchnorm.ipynb
@@ -5,7 +5,7 @@
    "id": "89fd15cb",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
     "\n",
     "# 🟡 Medium: Implement BatchNorm\n",
     "\n",
@@ -55,7 +55,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a1d5510567a2"
   },
   {
    "cell_type": "code",
@@ -64,7 +65,8 @@
    "outputs": [],
    "source": [
     "import torch"
-   ]
+   ],
+   "id": "cell-b96e8321fb29"
   },
   {
    "cell_type": "code",
@@ -126,7 +128,8 @@
     "# ✅ SUBMIT\n",
     "from torch_judge import check\n",
     "check(\"batchnorm\")"
-   ]
+   ],
+   "id": "cell-8776a5449c11"
   }
  ],
  "metadata": {
diff --git a/templates/08_rmsnorm.ipynb b/templates/08_rmsnorm.ipynb
index facfbf5..44bb03e 100644
--- a/templates/08_rmsnorm.ipynb
+++ b/templates/08_rmsnorm.ipynb
@@ -17,7 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb)\n",
     "\n",
     "# 🟡 Medium: Implement RMSNorm\n",
     "\n",
@@ -36,7 +36,7 @@
     "- Normalize over `dim=-1`\n",
     "- Must support autograd"
    ],
-   "outputs": []
+   "id": "cell-e12e535faa79"
   },
   {
    "cell_type": "code",
@@ -50,7 +50,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2c55ef9c078d"
   },
   {
    "cell_type": "code",
@@ -59,7 +60,8 @@
     "import torch"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-68c3e79b21f4"
   },
   {
    "cell_type": "code",
@@ -71,7 +73,8 @@
     "    pass  # Replace this"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e007d3472d17"
   },
   {
    "cell_type": "code",
@@ -85,7 +88,8 @@
     "print(\"RMS of output:\", out.pow(2).mean(dim=-1).sqrt())  # should be ~1"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-96e430eb39cd"
   },
   {
    "cell_type": "code",
@@ -95,7 +99,8 @@
     "check('rmsnorm')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8c428d84e93c"
   }
  ]
 }
diff --git a/templates/09_causal_attention.ipynb b/templates/09_causal_attention.ipynb
index 24bdb6c..62bb162 100644
--- a/templates/09_causal_attention.ipynb
+++ b/templates/09_causal_attention.ipynb
@@ -17,28 +17,31 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb)\n",
     "\n",
-    "# 🔴 Hard: Causal Self-Attention\n",
+    "# Hard: Causal Self-Attention\n",
     "\n",
-    "Implement **causal (masked) self-attention** — the attention used in GPT-style decoders.\n",
+    "Implement **causal (masked) self-attention** - the attention used in GPT-style decoders.\n",
     "\n",
     "Same as softmax attention, but each position can **only attend to itself and earlier positions** (no peeking at future tokens).\n",
     "\n",
-    "$$\\text{scores}_{ij} = \\begin{cases} \\frac{Q_i \\cdot K_j}{\\sqrt{d_k}} & \\text{if } j \\le i \\\\ -\\infty & \\text{if } j > i \\end{cases}$$\n",
+    "For each query position `i` and key position `j`:\n",
+    "\n",
+    "- If `j <= i`, use the scaled dot product score `Q_i dot K_j / sqrt(d_k)`.\n",
+    "- If `j > i`, mask the score to `-inf` before softmax.\n",
     "\n",
     "### Signature\n",
     "```python\n",
     "def causal_attention(Q, K, V):\n",
-    "    # Q, K, V: (batch, seq, d) → output: (batch, seq, d_v)\n",
+    "    # Q, K, V: (batch, seq, d) -> output: (batch, seq, d_v)\n",
     "```\n",
     "\n",
     "### Rules\n",
     "- Do **NOT** use `F.scaled_dot_product_attention`\n",
-    "- Position $i$ can only attend to positions $\\le i$\n",
+    "- Position `i` can only attend to positions `<= i`\n",
     "- You **may** use `torch.softmax`, `torch.bmm`, `torch.triu`"
    ],
-   "outputs": []
+   "id": "cell-7d839747fec3"
   },
   {
    "cell_type": "code",
@@ -52,7 +55,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-dc437a6bbd84"
   },
   {
    "cell_type": "code",
@@ -62,25 +66,27 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2d1e83527d76"
   },
   {
    "cell_type": "code",
    "metadata": {},
    "source": [
-    "# ✏️ YOUR IMPLEMENTATION HERE\n",
+    "# YOUR IMPLEMENTATION HERE\n",
     "\n",
     "def causal_attention(Q, K, V):\n",
     "    pass  # Replace this"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a759e033e85f"
   },
   {
    "cell_type": "code",
    "metadata": {},
    "source": [
-    "# 🧪 Debug\n",
+    "# Debug\n",
     "torch.manual_seed(0)\n",
     "Q = torch.randn(1, 4, 8)\n",
     "K = torch.randn(1, 4, 8)\n",
@@ -90,7 +96,8 @@
     "print(\"Pos 0 == V[0]?\", torch.allclose(out[:, 0], V[:, 0], atol=1e-5))  # should be True"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-dce8bccb8829"
   },
   {
    "cell_type": "code",
@@ -100,7 +107,8 @@
     "check('causal_attention')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-567f615926ef"
   }
  ]
 }
diff --git a/templates/10_gqa.ipynb b/templates/10_gqa.ipynb
index 92d544d..1d38b2f 100644
--- a/templates/10_gqa.ipynb
+++ b/templates/10_gqa.ipynb
@@ -17,7 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb)\n",
     "\n",
     "# 🔴 Hard: Grouped Query Attention (GQA)\n",
     "\n",
@@ -41,7 +41,7 @@
     "- Expand KV heads with `repeat_interleave` to match Q heads\n",
     "- When `num_kv_heads == num_heads`, should behave like standard MHA"
    ],
-   "outputs": []
+   "id": "cell-7bb0949ecab9"
   },
   {
    "cell_type": "code",
@@ -55,7 +55,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a9f438b80c19"
   },
   {
    "cell_type": "code",
@@ -66,7 +67,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-72546cc3829f"
   },
   {
    "cell_type": "code",
@@ -82,7 +84,8 @@
     "        pass  # Self-attention with grouped KV"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-afe0f28b92d2"
   },
   {
    "cell_type": "code",
@@ -99,7 +102,8 @@
     "print(\"Output shape:\", out.shape)           # (2, 6, 32)"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d6bc0a4fd2b2"
   },
   {
    "cell_type": "code",
@@ -109,7 +113,8 @@
     "check('gqa')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fcded4c20296"
   }
  ]
 }
diff --git a/templates/11_sliding_window.ipynb b/templates/11_sliding_window.ipynb
index d201fb1..b9a014f 100644
--- a/templates/11_sliding_window.ipynb
+++ b/templates/11_sliding_window.ipynb
@@ -17,7 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb)\n",
     "\n",
     "# 🔴 Hard: Sliding Window Attention\n",
     "\n",
@@ -38,7 +38,7 @@
     "- `window_size=0`: only self — output should equal V\n",
     "- `window_size >= seq_len`: equivalent to full attention"
    ],
-   "outputs": []
+   "id": "cell-e32a0584b338"
   },
   {
    "cell_type": "code",
@@ -52,7 +52,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-22f3b91fb2fb"
   },
   {
    "cell_type": "code",
@@ -62,7 +63,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-57792ca776ad"
   },
   {
    "cell_type": "code",
@@ -74,7 +76,8 @@
     "    pass  # Replace this"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-7a409749a915"
   },
   {
    "cell_type": "code",
@@ -93,7 +96,8 @@
     "print(\"window=0 == V?\", torch.allclose(out0, V, atol=1e-5))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-58a9801f9754"
   },
   {
    "cell_type": "code",
@@ -103,7 +107,8 @@
     "check('sliding_window')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b254d78dbd28"
   }
  ]
 }
diff --git a/templates/12_linear_attention.ipynb b/templates/12_linear_attention.ipynb
index 7d6ceeb..4bbb5f2 100644
--- a/templates/12_linear_attention.ipynb
+++ b/templates/12_linear_attention.ipynb
@@ -17,7 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb)\n",
     "\n",
     "# 🔴 Hard: Linear Self-Attention\n",
     "\n",
@@ -45,7 +45,7 @@
     "- Must be O(S·D²) — should run fast on long sequences\n",
     "- You **may** use `F.elu`"
    ],
-   "outputs": []
+   "id": "cell-198932b02ac5"
   },
   {
    "cell_type": "code",
@@ -59,7 +59,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6c7a263ebc92"
   },
   {
    "cell_type": "code",
@@ -69,7 +70,8 @@
     "import torch.nn.functional as F"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5d3dbb703c26"
   },
   {
    "cell_type": "code",
@@ -81,7 +83,8 @@
     "    pass  # Replace this"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5758c217d64f"
   },
   {
    "cell_type": "code",
@@ -96,7 +99,8 @@
     "print(\"Has NaN?\", torch.isnan(out).any().item())"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-94d00c576a69"
   },
   {
    "cell_type": "code",
@@ -106,7 +110,8 @@
     "check('linear_attention')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-56e3495be182"
   }
  ]
 }
diff --git a/templates/13_gpt2_block.ipynb b/templates/13_gpt2_block.ipynb
index 3211781..afba13d 100644
--- a/templates/13_gpt2_block.ipynb
+++ b/templates/13_gpt2_block.ipynb
@@ -17,7 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb)\n",
     "\n",
     "# 🔴 Hard: GPT-2 Transformer Block\n",
     "\n",
@@ -45,7 +45,7 @@
     "- Pre-norm architecture (LayerNorm *before* attention and MLP)\n",
     "- Residual connections around both attention and MLP"
    ],
-   "outputs": []
+   "id": "cell-d5cd40266298"
   },
   {
    "cell_type": "code",
@@ -59,7 +59,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-24b4bc91322b"
   },
   {
    "cell_type": "code",
@@ -70,7 +71,8 @@
     "import math"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2ba29d7983a8"
   },
   {
    "cell_type": "code",
@@ -87,7 +89,8 @@
     "        pass  # Pre-norm + causal attention + MLP with residuals"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ca7018184d17"
   },
   {
    "cell_type": "code",
@@ -103,7 +106,8 @@
     "print(\"Params:\", sum(p.numel() for p in block.parameters()))"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-da4eea3e58b3"
   },
   {
    "cell_type": "code",
@@ -113,7 +117,8 @@
     "check('gpt2_block')"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-111f4d94ae43"
   }
  ]
 }
diff --git a/templates/14_kv_cache.ipynb b/templates/14_kv_cache.ipynb
index a7d9e0b..b690c16 100644
--- a/templates/14_kv_cache.ipynb
+++ b/templates/14_kv_cache.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb)\n",
     "\n",
     "# 🔴 Hard: KV Cache Attention\n",
     "\n",
@@ -37,7 +37,7 @@
     "Decode:   [t5]           → Q=t5, K/V=cache+t5  → cache = (K_{0:5}, V_{0:5})\n",
     "```"
    ],
-   "outputs": []
+   "id": "cell-b5d61e9b5c17"
   },
   {
    "cell_type": "code",
@@ -51,7 +51,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1fceb50c7e24"
   },
   {
    "cell_type": "code",
@@ -62,7 +63,8 @@
     "import torch.nn as nn\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6554367058cc"
   },
   {
    "cell_type": "code",
@@ -84,7 +86,8 @@
     "        # 5. Return (output, (K_all, V_all))\n",
     "        pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d3e7878409d1"
   },
   {
    "cell_type": "code",
@@ -108,7 +111,8 @@
     "inc_out = torch.cat([out1, out2, out3], dim=1)\n",
     "print(\"Match:\", torch.allclose(full_out, inc_out, atol=1e-5))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-14bfb74b0635"
   },
   {
    "cell_type": "code",
@@ -119,7 +123,8 @@
     "from torch_judge import check\n",
     "check('kv_cache')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bf82c01bbc10"
   }
  ],
  "metadata": {
@@ -134,5 +139,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/15_mlp.ipynb b/templates/15_mlp.ipynb
index 0d238fa..ac6d6a9 100644
--- a/templates/15_mlp.ipynb
+++ b/templates/15_mlp.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb)\n",
     "\n",
     "# 🟠 Medium: SwiGLU MLP\n",
     "\n",
@@ -33,7 +33,7 @@
     "the gate projection controls information flow, while the up projection provides the content.\n",
     "This consistently outperforms standard FFNs in practice (PaLM, LLaMA, Mistral all use it)."
    ],
-   "outputs": []
+   "id": "cell-55d5c66de1fd"
   },
   {
    "cell_type": "code",
@@ -47,7 +47,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-7018cc74a7c2"
   },
   {
    "cell_type": "code",
@@ -58,7 +59,8 @@
     "import torch.nn as nn\n",
     "import torch.nn.functional as F"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1c01e6c658f2"
   },
   {
    "cell_type": "code",
@@ -75,7 +77,8 @@
     "    def forward(self, x):\n",
     "        pass  # down_proj(silu(gate_proj(x)) * up_proj(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c227aaa7f515"
   },
   {
    "cell_type": "code",
@@ -89,7 +92,8 @@
     "print(\"Output shape:\", out.shape)  # (2, 8, 64)\n",
     "print(\"Params:\", sum(p.numel() for p in mlp.parameters()))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4b208f4bd01a"
   },
   {
    "cell_type": "code",
@@ -100,7 +104,8 @@
     "from torch_judge import check\n",
     "check('mlp')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-af016ed9778b"
   }
  ],
  "metadata": {
@@ -115,5 +120,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/16_cross_entropy.ipynb b/templates/16_cross_entropy.ipynb
index 80b7765..5075d8f 100644
--- a/templates/16_cross_entropy.ipynb
+++ b/templates/16_cross_entropy.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb)\n",
     "\n",
     "# 🟢 Easy: Cross-Entropy Loss\n",
     "\n",
@@ -23,7 +23,7 @@
     "- Do NOT use `F.cross_entropy` or `nn.CrossEntropyLoss`\n",
     "- Must be numerically stable (use logsumexp trick)"
    ],
-   "outputs": []
+   "id": "cell-c375e29731a2"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e1170630e835"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-cfdc219c1b59"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "def cross_entropy_loss(logits, targets):\n",
     "    pass  # log_probs = logits - logsumexp(...)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-328aafbb75ac"
   },
   {
    "cell_type": "code",
@@ -71,7 +74,8 @@
     "print('Loss:', cross_entropy_loss(logits, targets))\n",
     "print('Ref: ', torch.nn.functional.cross_entropy(logits, targets))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-701f0b3f1410"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('cross_entropy')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f02326662761"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/17_dropout.ipynb b/templates/17_dropout.ipynb
index d2ec346..2bbe358 100644
--- a/templates/17_dropout.ipynb
+++ b/templates/17_dropout.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb)\n",
     "\n",
     "# 🟢 Easy: Implement Dropout\n",
     "\n",
@@ -22,7 +22,7 @@
     "- During **eval**: return input unchanged (identity)\n",
     "- Do NOT use `nn.Dropout` or `F.dropout`"
    ],
-   "outputs": []
+   "id": "cell-b195fa80956b"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-44ea058d38c9"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f94f37ecbb19"
   },
   {
    "cell_type": "code",
@@ -63,7 +65,8 @@
     "    def forward(self, x):\n",
     "        pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-856839f0607b"
   },
   {
    "cell_type": "code",
@@ -78,7 +81,8 @@
     "d.eval()\n",
     "print('Eval: ', d(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-29f86a714b53"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "from torch_judge import check\n",
     "check('dropout')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-be9c17b0787b"
   }
  ],
  "metadata": {
@@ -104,5 +109,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/18_embedding.ipynb b/templates/18_embedding.ipynb
index 1dc318c..ad3b088 100644
--- a/templates/18_embedding.ipynb
+++ b/templates/18_embedding.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb)\n",
     "\n",
     "# 🟢 Easy: Embedding Layer\n",
     "\n",
@@ -22,7 +22,7 @@
     "- Forward: index into weight matrix — `weight[indices]`\n",
     "- Do NOT use `nn.Embedding`"
    ],
-   "outputs": []
+   "id": "cell-2a2807c82e7f"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-875f47d08249"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e312e347e7aa"
   },
   {
    "cell_type": "code",
@@ -63,7 +65,8 @@
     "    def forward(self, indices):\n",
     "        pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-22589eb5a20c"
   },
   {
    "cell_type": "code",
@@ -76,7 +79,8 @@
     "print('Output shape:', emb(idx).shape)\n",
     "print('Matches manual:', torch.equal(emb(idx)[0], emb.weight[0]))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-687ee2396cd4"
   },
   {
    "cell_type": "code",
@@ -87,7 +91,8 @@
     "from torch_judge import check\n",
     "check('embedding')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a5b5d989b3e5"
   }
  ],
  "metadata": {
@@ -102,5 +107,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/19_gelu.ipynb b/templates/19_gelu.ipynb
index 613c65c..bc79535 100644
--- a/templates/19_gelu.ipynb
+++ b/templates/19_gelu.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb)\n",
     "\n",
     "# 🟢 Easy: GELU Activation\n",
     "\n",
@@ -21,7 +21,7 @@
     "- Do NOT use `F.gelu`, `nn.GELU`, or `torch.nn.functional.gelu`\n",
     "- Use `torch.erf` for the exact version"
    ],
-   "outputs": []
+   "id": "cell-4f5fb89c2573"
   },
   {
    "cell_type": "code",
@@ -35,7 +35,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fd66bf0d9d1f"
   },
   {
    "cell_type": "code",
@@ -45,7 +46,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f8d468af7734"
   },
   {
    "cell_type": "code",
@@ -57,7 +59,8 @@
     "def my_gelu(x):\n",
     "    pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b5fe89008e24"
   },
   {
    "cell_type": "code",
@@ -69,7 +72,8 @@
     "print('Output:', my_gelu(x))\n",
     "print('Ref:   ', torch.nn.functional.gelu(x))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8862265adb2d"
   },
   {
    "cell_type": "code",
@@ -80,7 +84,8 @@
     "from torch_judge import check\n",
     "check('gelu')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6764a5dea69a"
   }
  ],
  "metadata": {
@@ -95,5 +100,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/20_weight_init.ipynb b/templates/20_weight_init.ipynb
index 2a35343..6470bab 100644
--- a/templates/20_weight_init.ipynb
+++ b/templates/20_weight_init.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb)\n",
     "\n",
     "# 🟢 Easy: Kaiming Initialization\n",
     "\n",
@@ -20,7 +20,7 @@
     "    # Returns the weight tensor\n",
     "```"
    ],
-   "outputs": []
+   "id": "cell-c3fda7896e25"
   },
   {
    "cell_type": "code",
@@ -34,7 +34,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-236761a91c27"
   },
   {
    "cell_type": "code",
@@ -44,7 +45,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-499b086c88b6"
   },
   {
    "cell_type": "code",
@@ -56,7 +58,8 @@
     "def kaiming_init(weight):\n",
     "    pass  # fill with normal(0, sqrt(2/fan_in))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d23438bb715e"
   },
   {
    "cell_type": "code",
@@ -70,7 +73,8 @@
     "print(f'Mean: {w.mean():.4f} (expect ~0)')\n",
     "print(f'Std:  {w.std():.4f} (expect {math.sqrt(2/512):.4f})')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-809b6e52b602"
   },
   {
    "cell_type": "code",
@@ -81,7 +85,8 @@
     "from torch_judge import check\n",
     "check('weight_init')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b277660b87af"
   }
  ],
  "metadata": {
@@ -96,5 +101,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/21_gradient_clipping.ipynb b/templates/21_gradient_clipping.ipynb
index 4f783f8..ba837d5 100644
--- a/templates/21_gradient_clipping.ipynb
+++ b/templates/21_gradient_clipping.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb)\n",
     "\n",
     "# 🟢 Easy: Gradient Norm Clipping\n",
     "\n",
@@ -22,7 +22,7 @@
     "2. If total > max_norm: scale all grads by `max_norm / total`\n",
     "3. Return original total norm"
    ],
-   "outputs": []
+   "id": "cell-e28caf6c3582"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b413d4367239"
   },
   {
    "cell_type": "code",
@@ -45,7 +46,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-21b2f0a310f7"
   },
   {
    "cell_type": "code",
@@ -57,7 +59,8 @@
     "def clip_grad_norm(parameters, max_norm):\n",
     "    pass  # compute total norm, clip if needed, return original norm"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6017d76191ec"
   },
   {
    "cell_type": "code",
@@ -72,7 +75,8 @@
     "print('After: ', p.grad.norm().item())\n",
     "print('Original norm:', orig)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8e06f47a1286"
   },
   {
    "cell_type": "code",
@@ -83,7 +87,8 @@
     "from torch_judge import check\n",
     "check('gradient_clipping')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6ce4d03f65b1"
   }
  ],
  "metadata": {
@@ -98,5 +103,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/22_conv2d.ipynb b/templates/22_conv2d.ipynb
index 9f5c0cd..11e062e 100644
--- a/templates/22_conv2d.ipynb
+++ b/templates/22_conv2d.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb)\n",
     "\n",
     "# 🟠 Medium: 2D Convolution\n",
     "\n",
@@ -22,7 +22,7 @@
     "- Support `stride` and `padding` parameters\n",
     "- `F.pad` for zero-padding is allowed"
    ],
-   "outputs": []
+   "id": "cell-5e18a8e38faa"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-7270ae5fe54f"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
     "import torch\n",
     "import torch.nn.functional as F"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-69c0ede1341a"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "def my_conv2d(x, weight, bias=None, stride=1, padding=0):\n",
     "    pass  # extract patches, apply kernel, handle stride/padding"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-7f0d7ebada60"
   },
   {
    "cell_type": "code",
@@ -71,7 +74,8 @@
     "print('Output:', my_conv2d(x, w).shape)\n",
     "print('Match:', torch.allclose(my_conv2d(x, w), F.conv2d(x, w), atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e74cb3ef2145"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('conv2d')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d1448814fd6c"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/23_cross_attention.ipynb b/templates/23_cross_attention.ipynb
index 2467285..5a480fb 100644
--- a/templates/23_cross_attention.ipynb
+++ b/templates/23_cross_attention.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb)\n",
     "\n",
     "# 🟠 Medium: Multi-Head Cross-Attention\n",
     "\n",
@@ -23,7 +23,7 @@
     "- Q comes from the decoder, K and V come from the encoder\n",
     "- No causal mask (all encoder positions visible)"
    ],
-   "outputs": []
+   "id": "cell-34a817f5fad7"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-435b5ae23b1b"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
     "import torch.nn as nn\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f994399f2c94"
   },
   {
    "cell_type": "code",
@@ -65,7 +67,8 @@
     "    def forward(self, x_q, x_kv):\n",
     "        pass  # Q from x_q, K/V from x_kv, no causal mask"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1f37d11c8a9d"
   },
   {
    "cell_type": "code",
@@ -78,7 +81,8 @@
     "x_kv = torch.randn(2, 10, 64)\n",
     "print('Output:', attn(x_q, x_kv).shape)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-64eafa287bd5"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "from torch_judge import check\n",
     "check('cross_attention')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1f9c3bf410d6"
   }
  ],
  "metadata": {
@@ -104,5 +109,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/24_rope.ipynb b/templates/24_rope.ipynb
index d4ffd7c..f624e51 100644
--- a/templates/24_rope.ipynb
+++ b/templates/24_rope.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb)\n",
     "\n",
     "# 🔴 Hard: Rotary Position Embedding (RoPE)\n",
     "\n",
@@ -24,7 +24,7 @@
     "```\n",
     "This makes `dot(q_rot[i], k_rot[j])` depend only on `i - j` (relative position)."
    ],
-   "outputs": []
+   "id": "cell-da2a3a074d20"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-84b6ccc614aa"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5e4a7489f78a"
   },
   {
    "cell_type": "code",
@@ -63,7 +65,8 @@
     "    # 3. Apply rotation\n",
     "    pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4a15485a11b8"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "print('Shape preserved:', qr.shape == q.shape)\n",
     "print('Norm preserved:', torch.allclose(q.norm(dim=-1), qr.norm(dim=-1), atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bc2629a4ce01"
   },
   {
    "cell_type": "code",
@@ -88,7 +92,8 @@
     "from torch_judge import check\n",
     "check('rope')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c030cc8cf1ed"
   }
  ],
  "metadata": {
@@ -103,5 +108,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/25_flash_attention.ipynb b/templates/25_flash_attention.ipynb
index 249cd2a..304023f 100644
--- a/templates/25_flash_attention.ipynb
+++ b/templates/25_flash_attention.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb)\n",
     "\n",
     "# 🔴 Hard: Flash Attention (Tiled)\n",
     "\n",
@@ -26,7 +26,7 @@
     "\n",
     "Must give **identical** results to standard softmax attention."
    ],
-   "outputs": []
+   "id": "cell-a3a2f1752b33"
   },
   {
    "cell_type": "code",
@@ -40,7 +40,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-41f68149b64d"
   },
   {
    "cell_type": "code",
@@ -50,7 +51,8 @@
     "import torch\n",
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2682ca1fa6cb"
   },
   {
    "cell_type": "code",
@@ -63,7 +65,8 @@
     "    # Process Q in blocks, iterate K/V blocks with online softmax\n",
     "    pass"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3fbdae26e27b"
   },
   {
    "cell_type": "code",
@@ -78,7 +81,8 @@
     "ref = torch.bmm(torch.softmax(scores, dim=-1), V)\n",
     "print('Match:', torch.allclose(out, ref, atol=1e-4))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-39451abb2fb9"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "from torch_judge import check\n",
     "check('flash_attention')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6b54c01cbe94"
   }
  ],
  "metadata": {
@@ -104,5 +109,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/26_lora.ipynb b/templates/26_lora.ipynb
index eaf7f69..a011bbd 100644
--- a/templates/26_lora.ipynb
+++ b/templates/26_lora.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb)\n",
     "\n",
     "# 🟠 Medium: LoRA (Low-Rank Adaptation)\n",
     "\n",
@@ -25,7 +25,7 @@
     "- `self.lora_B`: `nn.Parameter(out_features, rank)` — **zero** init\n",
     "- Scaling: `alpha / rank`"
    ],
-   "outputs": []
+   "id": "cell-4f3ac30b4879"
   },
   {
    "cell_type": "code",
@@ -39,7 +39,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-804235218069"
   },
   {
    "cell_type": "code",
@@ -49,7 +50,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-738ad84143a1"
   },
   {
    "cell_type": "code",
@@ -66,7 +68,8 @@
     "    def forward(self, x):\n",
     "        pass  # base + lora"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-7d1bc24164ef"
   },
   {
    "cell_type": "code",
@@ -80,7 +83,8 @@
     "print('Trainable:', sum(p.numel() for p in layer.parameters() if p.requires_grad))\n",
     "print('Total:    ', sum(p.numel() for p in layer.parameters()))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d02803ceb58b"
   },
   {
    "cell_type": "code",
@@ -91,7 +95,8 @@
     "from torch_judge import check\n",
     "check('lora')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5d161facb5df"
   }
  ],
  "metadata": {
@@ -106,5 +111,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/27_vit_patch.ipynb b/templates/27_vit_patch.ipynb
index 17acd59..9c15d46 100644
--- a/templates/27_vit_patch.ipynb
+++ b/templates/27_vit_patch.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb)\n",
     "\n",
     "# 🟠 Medium: ViT Patch Embedding\n",
     "\n",
@@ -24,7 +24,7 @@
     "2. Project each patch: `nn.Linear(C*P*P, embed_dim)`\n",
     "3. `num_patches = (img_size // patch_size) ** 2`"
    ],
-   "outputs": []
+   "id": "cell-e3dac63f0643"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-740f83775300"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d41a3c3a21ea"
   },
   {
    "cell_type": "code",
@@ -65,7 +67,8 @@
     "    def forward(self, x):\n",
     "        pass  # reshape to patches, project"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-dc7725416d25"
   },
   {
    "cell_type": "code",
@@ -78,7 +81,8 @@
     "print('Output:', pe(x).shape)\n",
     "print('Patches:', pe.num_patches)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2413ef692804"
   },
   {
    "cell_type": "code",
@@ -89,7 +93,8 @@
     "from torch_judge import check\n",
     "check('vit_patch')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-19a6048ad032"
   }
  ],
  "metadata": {
@@ -104,5 +109,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/28_moe.ipynb b/templates/28_moe.ipynb
index f50fe33..1b8ef16 100644
--- a/templates/28_moe.ipynb
+++ b/templates/28_moe.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb)\n",
     "\n",
     "# 🔴 Hard: Mixture of Experts (MoE)\n",
     "\n",
@@ -23,7 +23,7 @@
     "- `self.experts`: `nn.ModuleList` of MLPs `(Linear→ReLU→Linear)`\n",
     "- For each token: select top-k experts, compute weighted sum of their outputs"
    ],
-   "outputs": []
+   "id": "cell-9c304a8c3956"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-20ad10e1e33a"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-34b93b3abcdd"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "    def forward(self, x):\n",
     "        pass  # route tokens to top-k experts"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f436bda3de57"
   },
   {
    "cell_type": "code",
@@ -77,7 +80,8 @@
     "print('Output:', moe(x).shape)\n",
     "print('Params:', sum(p.numel() for p in moe.parameters()))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-561861c5a056"
   },
   {
    "cell_type": "code",
@@ -88,7 +92,8 @@
     "from torch_judge import check\n",
     "check('moe')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-51890761b776"
   }
  ],
  "metadata": {
@@ -103,5 +108,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/29_adam.ipynb b/templates/29_adam.ipynb
index a3c1d63..c6316df 100644
--- a/templates/29_adam.ipynb
+++ b/templates/29_adam.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb)\n",
     "\n",
     "# 🟠 Medium: Adam Optimizer\n",
     "\n",
@@ -27,7 +27,7 @@
     "p -= lr * m̂ / (√v̂ + ε)\n",
     "```"
    ],
-   "outputs": []
+   "id": "cell-5cb31982aaf8"
   },
   {
    "cell_type": "code",
@@ -41,7 +41,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1cb4c371e8f7"
   },
   {
    "cell_type": "code",
@@ -50,7 +51,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3a93c9989281"
   },
   {
    "cell_type": "code",
@@ -69,7 +71,8 @@
     "    def zero_grad(self):\n",
     "        pass  # zero all gradients"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a236feaf1380"
   },
   {
    "cell_type": "code",
@@ -87,7 +90,8 @@
     "    opt.zero_grad()\n",
     "    print(f'Step {i}: loss={loss.item():.4f}')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b86f6e0429ec"
   },
   {
    "cell_type": "code",
@@ -98,7 +102,8 @@
     "from torch_judge import check\n",
     "check('adam')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-64d025317dea"
   }
  ],
  "metadata": {
@@ -113,5 +118,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/30_cosine_lr.ipynb b/templates/30_cosine_lr.ipynb
index 108f579..d6ae82b 100644
--- a/templates/30_cosine_lr.ipynb
+++ b/templates/30_cosine_lr.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb)\n",
     "\n",
     "# 🟠 Medium: Cosine LR Scheduler with Warmup\n",
     "\n",
@@ -22,7 +22,7 @@
     "```\n",
     "where `progress = (step - warmup) / (total - warmup)`"
    ],
-   "outputs": []
+   "id": "cell-2f7ab12b09f5"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-315c66c2530d"
   },
   {
    "cell_type": "code",
@@ -45,7 +46,8 @@
    "source": [
     "import math"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b65689467914"
   },
   {
    "cell_type": "code",
@@ -57,7 +59,8 @@
     "def cosine_lr_schedule(step, total_steps, warmup_steps, max_lr, min_lr=0.0):\n",
     "    pass  # warmup then cosine decay"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-11a186b646b6"
   },
   {
    "cell_type": "code",
@@ -71,7 +74,8 @@
     "print(f'Mid: {lrs[55]:.6f}')\n",
     "print(f'End: {lrs[100]:.6f}')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-ad10b7e52e80"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('cosine_lr')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-5c0c6e53cf62"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/31_gradient_accumulation.ipynb b/templates/31_gradient_accumulation.ipynb
index 78e205e..dfc4156 100644
--- a/templates/31_gradient_accumulation.ipynb
+++ b/templates/31_gradient_accumulation.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb)\n",
     "\n",
     "# 🟢 Easy: Gradient Accumulation\n",
     "\n",
@@ -25,7 +25,7 @@
     "\n",
     "The key insight: dividing each loss by `n` before backward makes accumulated gradients equal to a single large-batch gradient."
    ],
-   "outputs": []
+   "id": "cell-729ba7c32f56"
   },
   {
    "cell_type": "code",
@@ -39,7 +39,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e5d4e04e13dd"
   },
   {
    "cell_type": "code",
@@ -49,7 +50,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1782016823d3"
   },
   {
    "cell_type": "code",
@@ -61,7 +63,8 @@
     "def accumulated_step(model, optimizer, loss_fn, micro_batches):\n",
     "    pass  # zero_grad, loop (forward, scale loss, backward), step"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-252c5a1ae8bd"
   },
   {
    "cell_type": "code",
@@ -75,7 +78,8 @@
     "    [(torch.randn(2, 4), torch.randn(2, 2)) for _ in range(4)])\n",
     "print('Loss:', loss)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c342b0f437a0"
   },
   {
    "cell_type": "code",
@@ -86,7 +90,8 @@
     "from torch_judge import check\n",
     "check('gradient_accumulation')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fb30a026811c"
   }
  ],
  "metadata": {
@@ -101,5 +106,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/32_topk_sampling.ipynb b/templates/32_topk_sampling.ipynb
index bbb788e..04681c4 100644
--- a/templates/32_topk_sampling.ipynb
+++ b/templates/32_topk_sampling.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb)\n",
     "\n",
     "# 🟠 Medium: Top-k / Top-p (Nucleus) Sampling\n",
     "\n",
@@ -23,7 +23,7 @@
     "3. Top-p: sort by prob, mask tokens where cumulative prob exceeds p\n",
     "4. Sample from filtered distribution"
    ],
-   "outputs": []
+   "id": "cell-232eac9049c8"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2bbecf087b2d"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-a29f7d64a079"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "def sample_top_k_top_p(logits, top_k=0, top_p=1.0, temperature=1.0):\n",
     "    pass  # temperature, top-k filter, top-p filter, sample"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-0b28e6279efe"
   },
   {
    "cell_type": "code",
@@ -71,7 +74,8 @@
     "print('top_p=0.5:', sample_top_k_top_p(logits.clone(), top_p=0.5))\n",
     "print('temp=0.01:', sample_top_k_top_p(logits.clone(), temperature=0.01))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fe6e6bdda90f"
   },
   {
    "cell_type": "code",
@@ -82,7 +86,8 @@
     "from torch_judge import check\n",
     "check('topk_sampling')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-d06147650a42"
   }
  ],
  "metadata": {
@@ -97,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/33_beam_search.ipynb b/templates/33_beam_search.ipynb
index 7735f71..f708333 100644
--- a/templates/33_beam_search.ipynb
+++ b/templates/33_beam_search.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb)\n",
     "\n",
     "# 🟠 Medium: Beam Search Decoding\n",
     "\n",
@@ -23,7 +23,7 @@
     "3. Keep top `beam_width` beams by total log-probability\n",
     "4. Stop when best beam ends with `eos_token` or `max_len` reached"
    ],
-   "outputs": []
+   "id": "cell-a1ce1a0ad1e8"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-09943e13a512"
   },
   {
    "cell_type": "code",
@@ -46,7 +47,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-76094db43ca6"
   },
   {
    "cell_type": "code",
@@ -58,7 +60,8 @@
     "def beam_search(log_prob_fn, start_token, max_len, beam_width, eos_token):\n",
     "    pass  # maintain beams, expand, prune, return best"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-fa2f812569bc"
   },
   {
    "cell_type": "code",
@@ -73,7 +76,8 @@
     "seq = beam_search(simple_fn, start_token=0, max_len=5, beam_width=2, eos_token=4)\n",
     "print('Sequence:', seq)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c95ad95c3613"
   },
   {
    "cell_type": "code",
@@ -84,7 +88,8 @@
     "from torch_judge import check\n",
     "check('beam_search')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2d48c63dbaf6"
   }
  ],
  "metadata": {
@@ -99,5 +104,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/34_speculative_decoding.ipynb b/templates/34_speculative_decoding.ipynb
index 5b8e692..d17e494 100644
--- a/templates/34_speculative_decoding.ipynb
+++ b/templates/34_speculative_decoding.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb)\n",
     "\n",
     "# 🔴 Hard: Speculative Decoding\n",
     "\n",
@@ -25,7 +25,7 @@
     "2. Accept with probability `min(1, ratio)`\n",
     "3. If rejected: sample from `normalize(max(0, target - draft))`, append, and stop"
    ],
-   "outputs": []
+   "id": "cell-36f582c1e352"
   },
   {
    "cell_type": "code",
@@ -39,7 +39,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-24210e4e8b04"
   },
   {
    "cell_type": "code",
@@ -48,7 +49,8 @@
    "source": [
     "import torch"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-619e8bf022fe"
   },
   {
    "cell_type": "code",
@@ -60,7 +62,8 @@
     "def speculative_decode(target_probs, draft_probs, draft_tokens):\n",
     "    pass  # accept/reject loop"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-1af7a28c2e7a"
   },
   {
    "cell_type": "code",
@@ -76,7 +79,8 @@
     "draft = torch.softmax(torch.randn(4, 10), dim=-1)\n",
     "print('Random draft:', speculative_decode(target, draft, tokens))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-4881fb28d742"
   },
   {
    "cell_type": "code",
@@ -87,7 +91,8 @@
     "from torch_judge import check\n",
     "check('speculative_decoding')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-762b854c38a3"
   }
  ],
  "metadata": {
@@ -102,5 +107,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/35_bpe.ipynb b/templates/35_bpe.ipynb
index 9adfca0..8233175 100644
--- a/templates/35_bpe.ipynb
+++ b/templates/35_bpe.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb)\n",
     "\n",
     "# 🔴 Hard: Byte-Pair Encoding (BPE)\n",
     "\n",
@@ -24,7 +24,7 @@
     "3. Merge the most frequent pair into a single token\n",
     "4. Repeat for `num_merges` iterations"
    ],
-   "outputs": []
+   "id": "cell-7c7bccea580e"
   },
   {
    "cell_type": "code",
@@ -38,7 +38,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b0071d4dba45"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
    "source": [
     "# No imports needed"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-403c83cef060"
   },
   {
    "cell_type": "code",
@@ -66,7 +68,8 @@
     "    def encode(self, text):\n",
     "        pass  # apply learned merges to split text"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6c505da33a0d"
   },
   {
    "cell_type": "code",
@@ -79,7 +82,8 @@
     "print('Merges:', bpe.merges[:5])\n",
     "print('Encode:', bpe.encode('low lower'))"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-df894fa7e08e"
   },
   {
    "cell_type": "code",
@@ -90,7 +94,8 @@
     "from torch_judge import check\n",
     "check('bpe')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-c1eabc45d192"
   }
  ],
  "metadata": {
@@ -105,5 +110,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/36_int8_quantization.ipynb b/templates/36_int8_quantization.ipynb
index 0c3516a..c35baed 100644
--- a/templates/36_int8_quantization.ipynb
+++ b/templates/36_int8_quantization.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb)\n",
     "\n",
     "# 🔴 Hard: INT8 Quantized Linear\n",
     "\n",
@@ -23,7 +23,7 @@
     "3. Store as `register_buffer` (not trainable)\n",
     "4. Forward: dequantize (`int8.float() * scale`) then matmul"
    ],
-   "outputs": []
+   "id": "cell-7db8d2e6ad86"
   },
   {
    "cell_type": "code",
@@ -37,7 +37,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-bad6a1fafbe1"
   },
   {
    "cell_type": "code",
@@ -47,7 +48,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-8535b80f5f3f"
   },
   {
    "cell_type": "code",
@@ -64,7 +66,8 @@
     "    def forward(self, x):\n",
     "        pass  # dequantize and matmul"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-40724188a196"
   },
   {
    "cell_type": "code",
@@ -79,7 +82,8 @@
     "print('dtype:', q.weight_int8.dtype)\n",
     "print('Max quant error:', (w - q.weight_int8.float() * q.scale).abs().max().item())"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-110ae54125bd"
   },
   {
    "cell_type": "code",
@@ -90,7 +94,8 @@
     "from torch_judge import check\n",
     "check('int8_quantization')"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f0d8000cd49c"
   }
  ],
  "metadata": {
@@ -105,5 +110,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/37_dpo_loss.ipynb b/templates/37_dpo_loss.ipynb
index 38ea5b5..87d734d 100644
--- a/templates/37_dpo_loss.ipynb
+++ b/templates/37_dpo_loss.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb)\n",
     "\n",
     "# 🔴 Hard: DPO Loss\n",
     "\n",
@@ -19,7 +19,8 @@
     "    # All inputs: (B,) log-probabilities\n",
     "    # Returns: scalar loss\n",
     "```"
-   ]
+   ],
+   "id": "cell-5f1148d811f0"
   },
   {
    "cell_type": "code",
@@ -33,7 +34,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-50824f7a9545"
   },
   {
    "cell_type": "code",
@@ -43,7 +45,8 @@
    "source": [
     "import torch\n",
     "import torch.nn.functional as F"
-   ]
+   ],
+   "id": "cell-9e2999e4f730"
   },
   {
    "cell_type": "code",
@@ -56,7 +59,8 @@
     "def dpo_loss(policy_chosen_logps, policy_rejected_logps,\n",
     "             ref_chosen_logps, ref_rejected_logps, beta=0.1):\n",
     "    pass  # -log(sigmoid(beta * (chosen_reward - rejected_reward)))"
-   ]
+   ],
+   "id": "cell-6bb23f54bf17"
   },
   {
    "cell_type": "code",
@@ -70,7 +74,8 @@
     "ref_c = torch.tensor([-1.0, -1.0])\n",
     "ref_r = torch.tensor([-1.0, -1.0])\n",
     "print('Loss:', dpo_loss(chosen, rejected, ref_c, ref_r, beta=0.1).item())"
-   ]
+   ],
+   "id": "cell-a4667c46e10b"
   },
   {
    "cell_type": "code",
@@ -81,7 +86,8 @@
     "# ✅ SUBMIT\n",
     "from torch_judge import check\n",
     "check('dpo_loss')"
-   ]
+   ],
+   "id": "cell-1c162f8718df"
   }
  ],
  "metadata": {
@@ -96,5 +102,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/templates/38_grpo_loss.ipynb b/templates/38_grpo_loss.ipynb
index 546f212..5e64339 100644
--- a/templates/38_grpo_loss.ipynb
+++ b/templates/38_grpo_loss.ipynb
@@ -5,7 +5,7 @@
    "id": "968cc37c",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n",
     "\n",
     "# 🔴 Hard: GRPO Loss\n",
     "\n",
@@ -49,7 +49,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-763451936f66"
   },
   {
    "cell_type": "code",
diff --git a/templates/39_ppo_loss.ipynb b/templates/39_ppo_loss.ipynb
index 7958571..21d9907 100644
--- a/templates/39_ppo_loss.ipynb
+++ b/templates/39_ppo_loss.ipynb
@@ -5,7 +5,7 @@
    "id": "6674fa96",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n",
     "\n",
     "# 🔴 Hard: PPO Clipped Loss\n",
     "\n",
@@ -54,7 +54,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-e455b0c64ef7"
   },
   {
    "cell_type": "code",
@@ -65,7 +66,8 @@
     "import torch\n",
     "import torch.nn.functional as F\n",
     "from torch import Tensor\n"
-   ]
+   ],
+   "id": "cell-9a423ce072a5"
   },
   {
    "cell_type": "code",
@@ -78,7 +80,8 @@
     "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n",
     "             clip_ratio: float = 0.2) -> Tensor:\n",
     "    pass  # -mean(min(r * adv, clamp(r, 1-clip, 1+clip) * adv)) with gradients only through new_logps\n"
-   ]
+   ],
+   "id": "cell-78651a5bf0cc"
   },
   {
    "cell_type": "code",
@@ -91,7 +94,8 @@
     "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n",
     "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n",
     "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n"
-   ]
+   ],
+   "id": "cell-06a29e93ae34"
   },
   {
    "cell_type": "code",
@@ -102,7 +106,8 @@
     "# ✅ SUBMIT\n",
     "from torch_judge import check\n",
     "check('ppo_loss')\n"
-   ]
+   ],
+   "id": "cell-837482c2b65a"
   }
  ],
  "metadata": {
diff --git a/templates/40_linear_regression.ipynb b/templates/40_linear_regression.ipynb
index 1d099e7..24da9dd 100644
--- a/templates/40_linear_regression.ipynb
+++ b/templates/40_linear_regression.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n",
+    "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n",
     "\n",
     "# 🟡 Medium: Linear Regression\n",
     "\n",
@@ -52,7 +52,8 @@
     "- `closed_form` must not use iterative optimization\n",
     "- `gradient_descent` must manually compute gradients (no `autograd`)\n",
     "- `nn_linear` should use `torch.nn.Linear` and `loss.backward()`"
-   ]
+   ],
+   "id": "cell-d1745b1876b4"
   },
   {
    "cell_type": "code",
@@ -66,7 +67,8 @@
     "    pass\n"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-3be7852a281e"
   },
   {
    "cell_type": "code",
@@ -76,7 +78,8 @@
     "import torch\n",
     "import torch.nn as nn"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-6a500c47f5a9"
   },
   {
    "cell_type": "code",
@@ -100,7 +103,8 @@
     "        \"\"\"Train nn.Linear with autograd\"\"\"\n",
     "        pass  # Return (w, b)"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-b1ff9cc6ea6b"
   },
   {
    "cell_type": "code",
@@ -126,7 +130,8 @@
     "\n",
     "print(f\"\\nTrue:         w={true_w}, b=3.0\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-f756deb6e805"
   },
   {
    "cell_type": "code",
@@ -137,7 +142,8 @@
     "from torch_judge import check\n",
     "check(\"linear_regression\")"
    ],
-   "execution_count": null
+   "execution_count": null,
+   "id": "cell-2a5bddbe8d48"
   }
  ],
  "metadata": {
@@ -152,5 +158,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }