diff --git a/.github/workflows/notebook-validation.yml b/.github/workflows/notebook-validation.yml new file mode 100644 index 0000000..0674575 --- /dev/null +++ b/.github/workflows/notebook-validation.yml @@ -0,0 +1,37 @@ +name: Validate notebooks + +on: + pull_request: + branches: [master] + paths: + - "templates/**/*.ipynb" + - "solutions/**/*.ipynb" + - "scripts/validate_notebooks.py" + - ".github/workflows/notebook-validation.yml" + push: + branches: [master] + paths: + - "templates/**/*.ipynb" + - "solutions/**/*.ipynb" + - "scripts/validate_notebooks.py" + - ".github/workflows/notebook-validation.yml" + workflow_dispatch: + +jobs: + validate-notebooks: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install nbformat + run: pip install nbformat + + - name: Validate notebooks + run: python scripts/validate_notebooks.py diff --git a/scripts/add_colab_badges.py b/scripts/add_colab_badges.py index e6f0c1a..6289ddb 100644 --- a/scripts/add_colab_badges.py +++ b/scripts/add_colab_badges.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Add 'Open in Colab' badges to all template and solution notebooks.""" +"""Add 'Open in Colab' links to all template and solution notebooks.""" import json from pathlib import Path @@ -9,7 +9,6 @@ ROOT = Path(__file__).resolve().parent.parent TEMPLATES_DIR = ROOT / "templates" SOLUTIONS_DIR = ROOT / "solutions" -BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg" def colab_url(filename: str, folder: str) -> str: @@ -19,8 +18,8 @@ def colab_url(filename: str, folder: str) -> str: ) -def badge_markdown(filename: str, folder: str) -> str: - return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename, folder)})" +def colab_markdown(filename: str, folder: str) -> str: + return f"[Open in Colab]({colab_url(filename, folder)})" def process_notebook(path: Path, folder: str) -> bool: @@ -33,11 +32,11 @@ def process_notebook(path: Path, folder: str) -> bool: source_lines = cells[0]["source"] flat = "".join(source_lines) if isinstance(source_lines, list) else source_lines - if "colab-badge.svg" in flat: + if "colab.research.google.com/github/" in flat: return False - badge = badge_markdown(path.name, folder) - cells[0]["source"] = [badge + "\n\n"] + ( + link = colab_markdown(path.name, folder) + cells[0]["source"] = [link + "\n\n"] + ( source_lines if isinstance(source_lines, list) else [source_lines] ) diff --git a/scripts/validate_notebooks.py b/scripts/validate_notebooks.py new file mode 100644 index 0000000..1ae3954 --- /dev/null +++ b/scripts/validate_notebooks.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +"""Validate and optionally repair notebook cell schemas.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +from pathlib import Path +import re +from typing import Any + + +ROOT = Path(__file__).resolve().parent.parent +NOTEBOOK_GLOBS = ("templates/*.ipynb", "solutions/*.ipynb") +CODE_ONLY_FIELDS = ("outputs", "execution_count") +CELL_ID_RE = re.compile(r"^[A-Za-z0-9-_]+$") + + +def notebook_paths() -> list[Path]: + paths: list[Path] = [] + for pattern in NOTEBOOK_GLOBS: + paths.extend(ROOT.glob(pattern)) + return sorted(paths) + + +def load_notebook(path: Path) -> dict[str, Any]: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def write_notebook(path: Path, notebook: dict[str, Any]) -> None: + with path.open("w", encoding="utf-8") as f: + json.dump(notebook, f, ensure_ascii=False, indent=1) + f.write("\n") + + +def source_text(cell: dict[str, Any]) -> str: + source = cell.get("source", "") + if isinstance(source, list): + return "".join(str(line) for line in source) + return str(source) + + +def stable_cell_id(path: Path, index: int, cell: dict[str, Any], used: set[str]) -> str: + seed = ( + f"{path.relative_to(ROOT)}:{index}:" + f"{cell.get('cell_type', '')}:{source_text(cell)}" + ) + base = f"cell-{hashlib.sha1(seed.encode('utf-8')).hexdigest()[:12]}" + cell_id = base + suffix = 1 + while cell_id in used: + cell_id = f"{base}-{suffix}" + suffix += 1 + return cell_id + + +def sanitize_notebook(path: Path, fix: bool) -> list[str]: + notebook = load_notebook(path) + errors: list[str] = [] + changed = False + used_ids: set[str] = set() + needs_cell_ids = False + nbformat_minor = int(notebook.get("nbformat_minor", 0)) + version_error_added = False + + for index, cell in enumerate(notebook.get("cells", [])): + cell_id = cell.get("id") + if not isinstance(cell_id, str) or not cell_id: + errors.append(f"{path.relative_to(ROOT)} cell {index}: missing cell id") + needs_cell_ids = True + if fix: + cell["id"] = stable_cell_id(path, index, cell, used_ids) + cell_id = cell["id"] + changed = True + elif not CELL_ID_RE.match(cell_id): + errors.append(f"{path.relative_to(ROOT)} cell {index}: invalid cell id") + needs_cell_ids = True + if fix: + cell["id"] = stable_cell_id(path, index, cell, used_ids) + cell_id = cell["id"] + changed = True + + if isinstance(cell_id, str): + if cell_id in used_ids: + errors.append(f"{path.relative_to(ROOT)} cell {index}: duplicate cell id") + if fix: + cell["id"] = stable_cell_id(path, index, cell, used_ids) + cell_id = cell["id"] + changed = True + used_ids.add(cell_id) + + if ( + isinstance(cell_id, str) + and cell_id + and nbformat_minor < 5 + and not version_error_added + ): + errors.append( + f"{path.relative_to(ROOT)}: cell ids require nbformat_minor >= 5" + ) + version_error_added = True + if fix: + needs_cell_ids = True + changed = True + + if cell.get("cell_type") == "code": + continue + + for field in CODE_ONLY_FIELDS: + if field in cell: + errors.append( + f"{path.relative_to(ROOT)} cell {index}: " + f"non-code cell contains '{field}'" + ) + if fix: + del cell[field] + changed = True + + if changed: + if needs_cell_ids: + notebook["nbformat"] = 4 + notebook["nbformat_minor"] = max(int(notebook.get("nbformat_minor", 0)), 5) + write_notebook(path, notebook) + + return errors + + +def validate_with_nbformat(path: Path) -> str | None: + try: + import nbformat + except ImportError: + return None + + try: + notebook = nbformat.read(path, as_version=4) + nbformat.validate(notebook) + except Exception as exc: # pragma: no cover - message is for CLI output + return f"{path.relative_to(ROOT)}: {exc}" + + return None + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--fix", + action="store_true", + help="repair missing/invalid ids and remove code-only fields from non-code cells", + ) + args = parser.parse_args() + + schema_errors: list[str] = [] + for path in notebook_paths(): + schema_errors.extend(sanitize_notebook(path, args.fix)) + + if schema_errors and not args.fix: + print("Notebook schema errors:") + print("\n".join(schema_errors)) + print("\nRun scripts/validate_notebooks.py --fix to repair them.") + return 1 + + nbformat_errors: list[str] = [] + for path in notebook_paths(): + error = validate_with_nbformat(path) + if error: + nbformat_errors.append(error) + + if nbformat_errors: + print("nbformat validation errors:") + print("\n".join(nbformat_errors)) + return 1 + + if schema_errors: + print(f"Fixed {len(schema_errors)} notebook schema issue(s).") + else: + print("All notebooks passed validation.") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/solutions/01_relu_solution.ipynb b/solutions/01_relu_solution.ipynb index 085e3ab..9f95fe9 100644 --- a/solutions/01_relu_solution.ipynb +++ b/solutions/01_relu_solution.ipynb @@ -5,7 +5,7 @@ "id": "0556419b", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n", "\n", "# 🟒 Solution: Implement ReLU\n", "\n", @@ -26,7 +26,8 @@ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n", "except ImportError:\n", " pass\n" - ] + ], + "id": "cell-8b01cc47c4eb" }, { "cell_type": "code", @@ -35,7 +36,8 @@ "outputs": [], "source": [ "import torch" - ] + ], + "id": "cell-42aacb4e5964" }, { "cell_type": "code", @@ -47,7 +49,8 @@ "\n", "def relu(x: torch.Tensor) -> torch.Tensor:\n", " return x * (x > 0).float()" - ] + ], + "id": "cell-77d0ad2d5301" }, { "cell_type": "code", @@ -59,7 +62,8 @@ "x = torch.tensor([-2., -1., 0., 1., 2.])\n", "print(\"Input: \", x)\n", "print(\"Output:\", relu(x))" - ] + ], + "id": "cell-ee0dd6b7c97c" }, { "cell_type": "code", @@ -70,7 +74,8 @@ "# Run judge\n", "from torch_judge import check\n", "check(\"relu\")" - ] + ], + "id": "cell-a93501c6f94f" } ], "metadata": { diff --git a/solutions/02_softmax_solution.ipynb b/solutions/02_softmax_solution.ipynb index 902106e..c6a451d 100644 --- a/solutions/02_softmax_solution.ipynb +++ b/solutions/02_softmax_solution.ipynb @@ -17,14 +17,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n", "# 🟒 Solution: Implement Softmax\n", "\n", "Reference solution for the numerically-stable Softmax function.\n", "\n", "$$\\text{softmax}(x_i) = \\frac{e^{x_i - \\max(x)}}{\\sum_j e^{x_j - \\max(x)}}$$" ], - "outputs": [] + "id": "cell-1b2118d19858" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-23f1a6c527b4" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import torch" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-01a8eba71de9" }, { "cell_type": "code", @@ -61,7 +63,8 @@ " return e_x / e_x.sum(dim=dim, keepdim=True)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-455503eacc0f" }, { "cell_type": "code", @@ -74,7 +77,8 @@ "print(\"Ref: \", torch.softmax(x, dim=-1))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-10c0561ea637" }, { "cell_type": "code", @@ -85,7 +89,8 @@ "check(\"softmax\")" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ff8e53cdd120" } ] } diff --git a/solutions/03_linear_solution.ipynb b/solutions/03_linear_solution.ipynb index 4f25a88..7ebd49c 100644 --- a/solutions/03_linear_solution.ipynb +++ b/solutions/03_linear_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n", "# 🟑 Solution: Simple Linear Layer\n", "\n", "Reference solution for a fully-connected linear layer: **y = xW^T + b**" ], - "outputs": [] + "id": "cell-292929ae7f3e" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-bde97d16cfd7" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-4bdd71596832" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " return x @ self.weight.T + self.bias" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8f2221e4cfc8" }, { "cell_type": "code", @@ -78,7 +81,8 @@ "print(\"Output shape:\", layer.forward(x).shape)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-bb7edb8373a5" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "check(\"linear\")" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-9ee6e790f9c4" } ] } diff --git a/solutions/04_layernorm_solution.ipynb b/solutions/04_layernorm_solution.ipynb index 59ac87c..9b2c501 100644 --- a/solutions/04_layernorm_solution.ipynb +++ b/solutions/04_layernorm_solution.ipynb @@ -17,14 +17,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n", "# 🟑 Solution: Implement LayerNorm\n", "\n", "Reference solution for Layer Normalization.\n", "\n", "$$\\text{LayerNorm}(x) = \\gamma \\cdot \\frac{x - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} + \\beta$$" ], - "outputs": [] + "id": "cell-cc810d3dd609" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-380c8f4b79b5" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import torch" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a8430847acb3" }, { "cell_type": "code", @@ -62,7 +64,8 @@ " return gamma * x_norm + beta" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-592a4ee9f6b8" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "print(\"Match ref?\", torch.allclose(out, ref, atol=1e-4))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-4bcbd3a0190d" }, { "cell_type": "code", @@ -88,7 +92,8 @@ "check(\"layernorm\")" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-69b499f00af4" } ] } diff --git a/solutions/05_attention_solution.ipynb b/solutions/05_attention_solution.ipynb index e82f45f..e2b5b10 100644 --- a/solutions/05_attention_solution.ipynb +++ b/solutions/05_attention_solution.ipynb @@ -5,7 +5,7 @@ "id": "5f63d076", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n", "# πŸ”΄ Solution: Softmax Attention\n", "\n", "Reference solution for the core Transformer attention mechanism.\n", @@ -36,7 +36,8 @@ "source": [ "import torch\n", "import math" - ] + ], + "id": "cell-caa94740fb9a" }, { "cell_type": "code", @@ -76,7 +77,8 @@ "V2 = torch.randn(1, 5, 32)\n", "out2 = scaled_dot_product_attention(Q2, K2, V2)\n", "print(\"Cross-attention shape:\", out2.shape, \"(expected: 1, 3, 32)\")" - ] + ], + "id": "cell-076c9d2a43e9" }, { "cell_type": "code", @@ -87,7 +89,8 @@ "# Run judge\n", "from torch_judge import check\n", "check(\"attention\")" - ] + ], + "id": "cell-490c36a20e6e" } ], "metadata": { diff --git a/solutions/06_multihead_attention_solution.ipynb b/solutions/06_multihead_attention_solution.ipynb index 7ed4ad4..8ec2b73 100644 --- a/solutions/06_multihead_attention_solution.ipynb +++ b/solutions/06_multihead_attention_solution.ipynb @@ -4,13 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n", "# πŸ”΄ Solution: Multi-Head Attention\n", "\n", "Reference solution for the Multi-Head Attention mechanism.\n", "\n", "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$" - ] + ], + "id": "cell-028370b72535" }, { "cell_type": "code", @@ -24,7 +25,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-908cc63a33f7" }, { "cell_type": "code", @@ -35,7 +37,8 @@ "import torch\n", "import torch.nn as nn\n", "import math" - ] + ], + "id": "cell-8ece6c477a3f" }, { "cell_type": "code", @@ -90,7 +93,8 @@ "V = torch.randn(1, 7, 32)\n", "out2 = mha.forward(Q, K, V)\n", "print(\"Cross-attn shape:\", out2.shape)" - ] + ], + "id": "cell-2a0617b81206" }, { "cell_type": "code", @@ -101,7 +105,8 @@ "# Run judge\n", "from torch_judge import check\n", "check(\"mha\")" - ] + ], + "id": "cell-0003f3ca234b" } ], "metadata": { diff --git a/solutions/07_batchnorm_solution.ipynb b/solutions/07_batchnorm_solution.ipynb index a108cc1..6538a23 100644 --- a/solutions/07_batchnorm_solution.ipynb +++ b/solutions/07_batchnorm_solution.ipynb @@ -5,7 +5,7 @@ "id": "ffd42526", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n", "# 🟑 Solution: Implement BatchNorm\n", "\n", "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates." @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-d351072ade14" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "outputs": [], "source": [ "import torch" - ] + ], + "id": "cell-5d5e4a552d3b" }, { "cell_type": "code", @@ -113,7 +115,8 @@ "source": [ "from torch_judge import check\n", "check('batchnorm')" - ] + ], + "id": "cell-d82b40692367" } ], "metadata": { diff --git a/solutions/08_rmsnorm_solution.ipynb b/solutions/08_rmsnorm_solution.ipynb index 0d58056..300c7e2 100644 --- a/solutions/08_rmsnorm_solution.ipynb +++ b/solutions/08_rmsnorm_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n", "# 🟑 Solution: Implement RMSNorm\n", "\n", "Reference solution for Root Mean Square Normalization." ], - "outputs": [] + "id": "cell-57b348142d0b" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e6f3d2d28e19" }, { "cell_type": "code", @@ -45,7 +46,8 @@ "import torch" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-94d24134aa71" }, { "cell_type": "code", @@ -58,7 +60,8 @@ " return x / rms * weight" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-af5101f0d85b" }, { "cell_type": "code", @@ -69,7 +72,8 @@ "print('RMS of output:', out.pow(2).mean(dim=-1).sqrt())" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a34a6cbd5af6" }, { "cell_type": "code", @@ -79,7 +83,8 @@ "check('rmsnorm')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a29bcc88c41f" } ] } diff --git a/solutions/09_causal_attention_solution.ipynb b/solutions/09_causal_attention_solution.ipynb index 995bcfb..52cf154 100644 --- a/solutions/09_causal_attention_solution.ipynb +++ b/solutions/09_causal_attention_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n", "# πŸ”΄ Solution: Causal Self-Attention\n", "\n", "Reference solution β€” softmax attention with an upper-triangular mask." ], - "outputs": [] + "id": "cell-c1de2705a1a5" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-142ed06bfb76" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-00d287977000" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " return torch.bmm(weights, V)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-288ac0183691" }, { "cell_type": "code", @@ -79,7 +82,8 @@ "print(\"Pos 0 == V[0]?\", torch.allclose(out[:, 0], V[:, 0], atol=1e-5))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ce0ed55dc29a" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "check('causal_attention')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5aae090f98f0" } ] } diff --git a/solutions/10_gqa_solution.ipynb b/solutions/10_gqa_solution.ipynb index fe0c545..edd8512 100644 --- a/solutions/10_gqa_solution.ipynb +++ b/solutions/10_gqa_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n", "# πŸ”΄ Solution: Grouped Query Attention\n", "\n", "Reference solution for GQA β€” MHA with shared KV heads." ], - "outputs": [] + "id": "cell-2f6b8c8449d1" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-b627b5983264" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-961aabd90256" }, { "cell_type": "code", @@ -80,7 +82,8 @@ " return self.W_o(out)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-3c7151bd4bda" }, { "cell_type": "code", @@ -90,7 +93,8 @@ "print('Output:', gqa.forward(torch.randn(1, 4, 32)).shape)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-63491cf021fc" }, { "cell_type": "code", @@ -100,7 +104,8 @@ "check('gqa')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-43fba5030854" } ] } diff --git a/solutions/11_sliding_window_solution.ipynb b/solutions/11_sliding_window_solution.ipynb index 54b77aa..395b247 100644 --- a/solutions/11_sliding_window_solution.ipynb +++ b/solutions/11_sliding_window_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n", "# πŸ”΄ Solution: Sliding Window Attention\n", "\n", "Reference solution β€” softmax attention with a band mask." ], - "outputs": [] + "id": "cell-949f274c6929" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-76a1ff3fb104" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-215c73588b6d" }, { "cell_type": "code", @@ -65,7 +67,8 @@ " return torch.bmm(weights, V)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e7c1261d7cc2" }, { "cell_type": "code", @@ -75,7 +78,8 @@ "print('window=0==V?', torch.allclose(sliding_window_attention(Q,K,V,0), V, atol=1e-5))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-0bfe3313e0ec" }, { "cell_type": "code", @@ -85,7 +89,8 @@ "check('sliding_window')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-c32446d3d2ce" } ] } diff --git a/solutions/12_linear_attention_solution.ipynb b/solutions/12_linear_attention_solution.ipynb index a16ec27..d3142af 100644 --- a/solutions/12_linear_attention_solution.ipynb +++ b/solutions/12_linear_attention_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n", "# πŸ”΄ Solution: Linear Self-Attention\n", "\n", "Reference solution β€” kernel-based attention with elu+1 feature map." ], - "outputs": [] + "id": "cell-e6d1a2463446" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8d5499280f34" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import torch.nn.functional as F" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e0c65ba3811c" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " return num / (den + 1e-6)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5de8a4fccb92" }, { "cell_type": "code", @@ -74,7 +77,8 @@ "print('Shape:', linear_attention(Q,K,V).shape)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-08049c1fc1e9" }, { "cell_type": "code", @@ -84,7 +88,8 @@ "check('linear_attention')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-0c691bb3dda2" } ] } diff --git a/solutions/13_gpt2_block_solution.ipynb b/solutions/13_gpt2_block_solution.ipynb index ea77036..70c4e80 100644 --- a/solutions/13_gpt2_block_solution.ipynb +++ b/solutions/13_gpt2_block_solution.ipynb @@ -17,12 +17,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n", "# πŸ”΄ Solution: GPT-2 Transformer Block\n", "\n", "Reference solution β€” pre-norm, causal self-attention, 4x MLP with GELU." ], - "outputs": [] + "id": "cell-f139f1687651" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5797a9b4d794" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ccdc588b2e88" }, { "cell_type": "code", @@ -93,7 +95,8 @@ " return x" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-b8f48d2d0577" }, { "cell_type": "code", @@ -104,7 +107,8 @@ "print('Params:', sum(p.numel() for p in block.parameters()))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-0bd03e9b2d4f" }, { "cell_type": "code", @@ -114,7 +118,8 @@ "check('gpt2_block')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5f28469af751" } ] } diff --git a/solutions/14_kv_cache_solution.ipynb b/solutions/14_kv_cache_solution.ipynb index 650149d..fdcf7e0 100644 --- a/solutions/14_kv_cache_solution.ipynb +++ b/solutions/14_kv_cache_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n", "# πŸ”΄ Solution: KV Cache Attention\n", "\n", "Reference solution β€” multi-head attention with KV caching for autoregressive inference." ], - "outputs": [] + "id": "cell-83ea25e0b178" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5215aed2af81" }, { "cell_type": "code", @@ -34,7 +35,8 @@ "import torch.nn as nn\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-40f82d0a853e" }, { "cell_type": "code", @@ -84,7 +86,8 @@ " out = self.W_o(attn.transpose(1, 2).contiguous().view(B, S_new, -1))\n", " return out, new_cache" ], - "execution_count": null + "execution_count": null, + "id": "cell-c538127a36f3" }, { "cell_type": "code", @@ -106,7 +109,8 @@ "print('Match:', torch.allclose(full_out, inc_out, atol=1e-5))\n", "print('Final cache K shape:', cache[0].shape)" ], - "execution_count": null + "execution_count": null, + "id": "cell-c96368b52bdf" }, { "cell_type": "code", @@ -116,7 +120,8 @@ "from torch_judge import check\n", "check('kv_cache')" ], - "execution_count": null + "execution_count": null, + "id": "cell-f62c94db448c" } ], "metadata": { @@ -131,5 +136,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/15_mlp_solution.ipynb b/solutions/15_mlp_solution.ipynb index ae3759b..1562319 100644 --- a/solutions/15_mlp_solution.ipynb +++ b/solutions/15_mlp_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n", "# 🟠 Solution: SwiGLU MLP\n", "\n", "Reference solution β€” gated feed-forward network used in LLaMA, Mistral, and PaLM." ], - "outputs": [] + "id": "cell-327c674b09a3" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8a670c67bd2d" }, { "cell_type": "code", @@ -34,7 +35,8 @@ "import torch.nn as nn\n", "import torch.nn.functional as F" ], - "execution_count": null + "execution_count": null, + "id": "cell-90e60fb22984" }, { "cell_type": "code", @@ -53,7 +55,8 @@ " def forward(self, x):\n", " return self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-fa2b06c62690" }, { "cell_type": "code", @@ -65,7 +68,8 @@ "print('Output:', mlp(x).shape)\n", "print('Params:', sum(p.numel() for p in mlp.parameters()))" ], - "execution_count": null + "execution_count": null, + "id": "cell-0a7f6956911d" }, { "cell_type": "code", @@ -75,7 +79,8 @@ "from torch_judge import check\n", "check('mlp')" ], - "execution_count": null + "execution_count": null, + "id": "cell-3c2ba9d27e2a" } ], "metadata": { @@ -90,5 +95,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/16_cross_entropy_solution.ipynb b/solutions/16_cross_entropy_solution.ipynb index a8945b0..fd2fd80 100644 --- a/solutions/16_cross_entropy_solution.ipynb +++ b/solutions/16_cross_entropy_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n", "# Solution: Cross-Entropy Loss\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-2fb3846edd6b" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-810fb0af01e1" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-c43c9eb70ab8" }, { "cell_type": "code", @@ -45,7 +47,8 @@ " log_probs = logits - torch.logsumexp(logits, dim=-1, keepdim=True)\n", " return -log_probs[torch.arange(targets.shape[0]), targets].mean()" ], - "execution_count": null + "execution_count": null, + "id": "cell-165a128e570b" }, { "cell_type": "code", @@ -58,7 +61,8 @@ "print('Loss:', cross_entropy_loss(logits, targets).item())\n", "print('Ref: ', torch.nn.functional.cross_entropy(logits, targets).item())" ], - "execution_count": null + "execution_count": null, + "id": "cell-ec2240ea845a" }, { "cell_type": "code", @@ -68,7 +72,8 @@ "from torch_judge import check\n", "check('cross_entropy')" ], - "execution_count": null + "execution_count": null, + "id": "cell-8c340ab8a582" } ], "metadata": { @@ -83,5 +88,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/17_dropout_solution.ipynb b/solutions/17_dropout_solution.ipynb index 1ce4b5d..774c200 100644 --- a/solutions/17_dropout_solution.ipynb +++ b/solutions/17_dropout_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n", "# Solution: Implement Dropout\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-3c3f4a26edc9" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ffc4fae24ddd" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-ca67bed3263d" }, { "cell_type": "code", @@ -53,7 +55,8 @@ " mask = (torch.rand_like(x) > self.p).float()\n", " return x * mask / (1 - self.p)" ], - "execution_count": null + "execution_count": null, + "id": "cell-0e5082346fc8" }, { "cell_type": "code", @@ -68,7 +71,8 @@ "d.eval()\n", "print('Eval: ', d(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-532dae90c27a" }, { "cell_type": "code", @@ -78,7 +82,8 @@ "from torch_judge import check\n", "check('dropout')" ], - "execution_count": null + "execution_count": null, + "id": "cell-ed3775a0dd83" } ], "metadata": { @@ -93,5 +98,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/18_embedding_solution.ipynb b/solutions/18_embedding_solution.ipynb index d1330e0..61ae34b 100644 --- a/solutions/18_embedding_solution.ipynb +++ b/solutions/18_embedding_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n", "# Solution: Embedding Layer\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-afda4068c60e" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-42988ce77d79" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-3c6f7bf0ce17" }, { "cell_type": "code", @@ -50,7 +52,8 @@ " def forward(self, indices):\n", " return self.weight[indices]" ], - "execution_count": null + "execution_count": null, + "id": "cell-a1ed06c0a969" }, { "cell_type": "code", @@ -63,7 +66,8 @@ "print('Output shape:', emb(idx).shape)\n", "print('Matches manual:', torch.equal(emb(idx)[0], emb.weight[0]))" ], - "execution_count": null + "execution_count": null, + "id": "cell-d2d8eec1e56d" }, { "cell_type": "code", @@ -73,7 +77,8 @@ "from torch_judge import check\n", "check('embedding')" ], - "execution_count": null + "execution_count": null, + "id": "cell-d8dc2ce4fddb" } ], "metadata": { @@ -88,5 +93,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/19_gelu_solution.ipynb b/solutions/19_gelu_solution.ipynb index 368e713..176e488 100644 --- a/solutions/19_gelu_solution.ipynb +++ b/solutions/19_gelu_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n", "# Solution: GELU Activation\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-aedbd04a46e8" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-1a0a9aa4bb13" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-9df3ad00c04d" }, { "cell_type": "code", @@ -45,7 +47,8 @@ "def my_gelu(x):\n", " return 0.5 * x * (1.0 + torch.erf(x / math.sqrt(2.0)))" ], - "execution_count": null + "execution_count": null, + "id": "cell-996389dd78fd" }, { "cell_type": "code", @@ -57,7 +60,8 @@ "print('Output:', my_gelu(x))\n", "print('Ref: ', torch.nn.functional.gelu(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-b26eb8ba6427" }, { "cell_type": "code", @@ -67,7 +71,8 @@ "from torch_judge import check\n", "check('gelu')" ], - "execution_count": null + "execution_count": null, + "id": "cell-8587c88d70fe" } ], "metadata": { @@ -82,5 +87,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/20_weight_init_solution.ipynb b/solutions/20_weight_init_solution.ipynb index c8a63df..2625d64 100644 --- a/solutions/20_weight_init_solution.ipynb +++ b/solutions/20_weight_init_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n", "# Solution: Kaiming Initialization\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-b51d57aa98eb" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-233cc1bc04db" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-0f3acbf9d368" }, { "cell_type": "code", @@ -49,7 +51,8 @@ " weight.normal_(0, std)\n", " return weight" ], - "execution_count": null + "execution_count": null, + "id": "cell-6a917ca0d9c3" }, { "cell_type": "code", @@ -63,7 +66,8 @@ "print(f'Mean: {w.mean():.4f} (expect ~0)')\n", "print(f'Std: {w.std():.4f} (expect {math.sqrt(2/512):.4f})')" ], - "execution_count": null + "execution_count": null, + "id": "cell-54ff04551f33" }, { "cell_type": "code", @@ -73,7 +77,8 @@ "from torch_judge import check\n", "check('weight_init')" ], - "execution_count": null + "execution_count": null, + "id": "cell-f5aeb33a9c33" } ], "metadata": { @@ -88,5 +93,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/21_gradient_clipping_solution.ipynb b/solutions/21_gradient_clipping_solution.ipynb index 0bfce24..1215192 100644 --- a/solutions/21_gradient_clipping_solution.ipynb +++ b/solutions/21_gradient_clipping_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n", "# Solution: Gradient Norm Clipping\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-186fcd826cfb" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-fee049700f77" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-c5983d553399" }, { "cell_type": "code", @@ -50,7 +52,8 @@ " p.grad.mul_(clip_coef)\n", " return total_norm.item()" ], - "execution_count": null + "execution_count": null, + "id": "cell-64232e18a698" }, { "cell_type": "code", @@ -65,7 +68,8 @@ "print('After: ', p.grad.norm().item())\n", "print('Returned:', orig)" ], - "execution_count": null + "execution_count": null, + "id": "cell-97b9bb9b160c" }, { "cell_type": "code", @@ -75,7 +79,8 @@ "from torch_judge import check\n", "check('gradient_clipping')" ], - "execution_count": null + "execution_count": null, + "id": "cell-e2b40dd48e5f" } ], "metadata": { @@ -90,5 +95,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/22_conv2d_solution.ipynb b/solutions/22_conv2d_solution.ipynb index 3a49c5f..02cdd33 100644 --- a/solutions/22_conv2d_solution.ipynb +++ b/solutions/22_conv2d_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n", "# Solution: 2D Convolution\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-34100e03ec1d" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-aebb546ee54c" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn.functional as F" ], - "execution_count": null + "execution_count": null, + "id": "cell-45eb56e75d78" }, { "cell_type": "code", @@ -55,7 +57,8 @@ " out = out + bias.view(1, -1, 1, 1)\n", " return out" ], - "execution_count": null + "execution_count": null, + "id": "cell-27160d106e01" }, { "cell_type": "code", @@ -68,7 +71,8 @@ "print('Output:', my_conv2d(x, w).shape)\n", "print('Match:', torch.allclose(my_conv2d(x, w), F.conv2d(x, w), atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-56d3b678d995" }, { "cell_type": "code", @@ -78,7 +82,8 @@ "from torch_judge import check\n", "check('conv2d')" ], - "execution_count": null + "execution_count": null, + "id": "cell-781b011cacf3" } ], "metadata": { @@ -93,5 +98,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/23_cross_attention_solution.ipynb b/solutions/23_cross_attention_solution.ipynb index bb7cc66..f9b598a 100644 --- a/solutions/23_cross_attention_solution.ipynb +++ b/solutions/23_cross_attention_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n", "# Solution: Multi-Head Cross-Attention\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-bbf14c9506bc" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-027a5a2149ee" }, { "cell_type": "code", @@ -34,7 +35,8 @@ "import torch.nn as nn\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-db9e5fb63d84" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " attn = torch.matmul(weights, v)\n", " return self.W_o(attn.transpose(1, 2).contiguous().view(B, S_q, -1))" ], - "execution_count": null + "execution_count": null, + "id": "cell-f243bbfab8d9" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "x_kv = torch.randn(2, 10, 64)\n", "print('Output:', attn(x_q, x_kv).shape)" ], - "execution_count": null + "execution_count": null, + "id": "cell-bd8c57c0eb94" }, { "cell_type": "code", @@ -87,7 +91,8 @@ "from torch_judge import check\n", "check('cross_attention')" ], - "execution_count": null + "execution_count": null, + "id": "cell-cb9690103dfb" } ], "metadata": { @@ -102,5 +107,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/24_rope_solution.ipynb b/solutions/24_rope_solution.ipynb index 02829b3..2fb01e7 100644 --- a/solutions/24_rope_solution.ipynb +++ b/solutions/24_rope_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n", "# Solution: Rotary Position Embedding (RoPE)\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-c2f10e6d11ca" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-f9142459ffc2" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-10f6cc867f9f" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "\n", " return rotate(q), rotate(k)" ], - "execution_count": null + "execution_count": null, + "id": "cell-115048dd87b9" }, { "cell_type": "code", @@ -72,7 +75,8 @@ "print('Shape preserved:', qr.shape == q.shape)\n", "print('Norm preserved:', torch.allclose(q.norm(dim=-1), qr.norm(dim=-1), atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-432b8aa0e19d" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('rope')" ], - "execution_count": null + "execution_count": null, + "id": "cell-a47fe29dcc3f" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/25_flash_attention_solution.ipynb b/solutions/25_flash_attention_solution.ipynb index 31135d6..1e275ac 100644 --- a/solutions/25_flash_attention_solution.ipynb +++ b/solutions/25_flash_attention_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n", "# Solution: Flash Attention (Tiled)\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-919746012c89" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-76cb2dc5db3b" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-c06bd37bbf50" }, { "cell_type": "code", @@ -65,7 +67,8 @@ " output[:, i:i+block_size] = acc / row_sum\n", " return output" ], - "execution_count": null + "execution_count": null, + "id": "cell-d86d1c076943" }, { "cell_type": "code", @@ -81,7 +84,8 @@ "print('Shape:', out.shape)\n", "print('Max diff:', (out - ref).abs().max().item())" ], - "execution_count": null + "execution_count": null, + "id": "cell-6bad8f8fab76" }, { "cell_type": "code", @@ -91,7 +95,8 @@ "from torch_judge import check\n", "check('flash_attention')" ], - "execution_count": null + "execution_count": null, + "id": "cell-751d77421b4b" } ], "metadata": { @@ -106,5 +111,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/26_lora_solution.ipynb b/solutions/26_lora_solution.ipynb index 49e1e8c..e06a610 100644 --- a/solutions/26_lora_solution.ipynb +++ b/solutions/26_lora_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n", "# Solution: LoRA (Low-Rank Adaptation)\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-ff8e0e744e26" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2efd4a7e5b1f" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-5cb7cfcddec4" }, { "cell_type": "code", @@ -55,7 +57,8 @@ " def forward(self, x):\n", " return self.linear(x) + (x @ self.lora_A.T @ self.lora_B.T) * self.scaling" ], - "execution_count": null + "execution_count": null, + "id": "cell-3aff23818b29" }, { "cell_type": "code", @@ -70,7 +73,8 @@ "total = sum(p.numel() for p in layer.parameters())\n", "print(f'Trainable: {trainable}/{total} ({100*trainable/total:.1f}%)')" ], - "execution_count": null + "execution_count": null, + "id": "cell-db39f1995c2e" }, { "cell_type": "code", @@ -80,7 +84,8 @@ "from torch_judge import check\n", "check('lora')" ], - "execution_count": null + "execution_count": null, + "id": "cell-aa8801cd066b" } ], "metadata": { @@ -95,5 +100,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/27_vit_patch_solution.ipynb b/solutions/27_vit_patch_solution.ipynb index 73004c4..ccb38ba 100644 --- a/solutions/27_vit_patch_solution.ipynb +++ b/solutions/27_vit_patch_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n", "# Solution: ViT Patch Embedding\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-1052469c40dd" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e059676064f7" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-407e7f22d20b" }, { "cell_type": "code", @@ -57,7 +59,8 @@ " x = x.permute(0, 2, 4, 1, 3, 5).reshape(B, n_h * n_w, C * p * p)\n", " return self.proj(x)" ], - "execution_count": null + "execution_count": null, + "id": "cell-568c4e982b4e" }, { "cell_type": "code", @@ -70,7 +73,8 @@ "print('Output:', pe(x).shape)\n", "print('Patches:', pe.num_patches)" ], - "execution_count": null + "execution_count": null, + "id": "cell-90513a00d6e2" }, { "cell_type": "code", @@ -80,7 +84,8 @@ "from torch_judge import check\n", "check('vit_patch')" ], - "execution_count": null + "execution_count": null, + "id": "cell-bd86b6064d17" } ], "metadata": { @@ -95,5 +100,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/28_moe_solution.ipynb b/solutions/28_moe_solution.ipynb index 5c1eb97..cabdbb5 100644 --- a/solutions/28_moe_solution.ipynb +++ b/solutions/28_moe_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n", "# Solution: Mixture of Experts (MoE)\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-7cb38bd21dd5" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-20f305ddbf45" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-c2b8322b82d7" }, { "cell_type": "code", @@ -70,7 +72,8 @@ " output[mask] += weights[mask, k:k+1] * self.experts[e](x_flat[mask])\n", " return output.reshape(orig_shape)" ], - "execution_count": null + "execution_count": null, + "id": "cell-6c3e1b0d0c5f" }, { "cell_type": "code", @@ -83,7 +86,8 @@ "print('Output:', moe(x).shape)\n", "print('Params:', sum(p.numel() for p in moe.parameters()))" ], - "execution_count": null + "execution_count": null, + "id": "cell-b415161dac16" }, { "cell_type": "code", @@ -93,7 +97,8 @@ "from torch_judge import check\n", "check('moe')" ], - "execution_count": null + "execution_count": null, + "id": "cell-caaff8dc98d0" } ], "metadata": { @@ -108,5 +113,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/29_adam_solution.ipynb b/solutions/29_adam_solution.ipynb index c31ae3b..18c9195 100644 --- a/solutions/29_adam_solution.ipynb +++ b/solutions/29_adam_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n", "# Solution: Adam Optimizer\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-869825a8f994" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5e26e6fd8079" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-f5375a3e3db7" }, { "cell_type": "code", @@ -68,7 +70,8 @@ " if p.grad is not None:\n", " p.grad.zero_()" ], - "execution_count": null + "execution_count": null, + "id": "cell-d9ba41b1a602" }, { "cell_type": "code", @@ -86,7 +89,8 @@ " opt.zero_grad()\n", " print(f'Step {i}: loss={loss.item():.4f}')" ], - "execution_count": null + "execution_count": null, + "id": "cell-a5facef72449" }, { "cell_type": "code", @@ -96,7 +100,8 @@ "from torch_judge import check\n", "check('adam')" ], - "execution_count": null + "execution_count": null, + "id": "cell-bd60a84cc521" } ], "metadata": { @@ -111,5 +116,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/30_cosine_lr_solution.ipynb b/solutions/30_cosine_lr_solution.ipynb index 74924cd..5428eca 100644 --- a/solutions/30_cosine_lr_solution.ipynb +++ b/solutions/30_cosine_lr_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n", "# Solution: Cosine LR Scheduler with Warmup\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-bc8e2d969538" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a84f751d833a" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-b8c396649b59" }, { "cell_type": "code", @@ -49,7 +51,8 @@ " progress = (step - warmup_steps) / (total_steps - warmup_steps)\n", " return min_lr + 0.5 * (max_lr - min_lr) * (1.0 + math.cos(math.pi * progress))" ], - "execution_count": null + "execution_count": null, + "id": "cell-43b9d284431c" }, { "cell_type": "code", @@ -60,7 +63,8 @@ "lrs = [cosine_lr_schedule(i, 100, 10, 0.001) for i in range(101)]\n", "print(f'Start: {lrs[0]:.6f}, Warmup end: {lrs[10]:.6f}, Mid: {lrs[55]:.6f}, End: {lrs[100]:.6f}')" ], - "execution_count": null + "execution_count": null, + "id": "cell-8c62685b9faa" }, { "cell_type": "code", @@ -70,7 +74,8 @@ "from torch_judge import check\n", "check('cosine_lr')" ], - "execution_count": null + "execution_count": null, + "id": "cell-d0d8797a08ee" } ], "metadata": { @@ -85,5 +90,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/31_gradient_accumulation_solution.ipynb b/solutions/31_gradient_accumulation_solution.ipynb index c289074..8cb372b 100644 --- a/solutions/31_gradient_accumulation_solution.ipynb +++ b/solutions/31_gradient_accumulation_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n", "# Solution: Gradient Accumulation\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-aa4172326b18" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-775aa8fec4c2" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-9c29cf9fa41a" }, { "cell_type": "code", @@ -53,7 +55,8 @@ " optimizer.step()\n", " return total_loss" ], - "execution_count": null + "execution_count": null, + "id": "cell-52e5829eeaff" }, { "cell_type": "code", @@ -67,7 +70,8 @@ " [(torch.randn(2, 4), torch.randn(2, 2)) for _ in range(4)])\n", "print('Accumulated loss:', loss)" ], - "execution_count": null + "execution_count": null, + "id": "cell-5745687ce4a4" }, { "cell_type": "code", @@ -77,7 +81,8 @@ "from torch_judge import check\n", "check('gradient_accumulation')" ], - "execution_count": null + "execution_count": null, + "id": "cell-1269a4b13577" } ], "metadata": { @@ -92,5 +97,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/32_topk_sampling_solution.ipynb b/solutions/32_topk_sampling_solution.ipynb index 31de067..3c3f6d3 100644 --- a/solutions/32_topk_sampling_solution.ipynb +++ b/solutions/32_topk_sampling_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n", "# Solution: Top-k / Top-p Sampling\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-08bed1e05d70" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e626587f927a" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-138508e1d532" }, { "cell_type": "code", @@ -56,7 +58,8 @@ " probs = torch.softmax(logits, dim=-1)\n", " return torch.multinomial(probs, 1).item()" ], - "execution_count": null + "execution_count": null, + "id": "cell-d89d26963ccf" }, { "cell_type": "code", @@ -68,7 +71,8 @@ "print('top_k=1:', sample_top_k_top_p(logits.clone(), top_k=1))\n", "print('top_p=0.5:', sample_top_k_top_p(logits.clone(), top_p=0.5))" ], - "execution_count": null + "execution_count": null, + "id": "cell-4d5635dc6b00" }, { "cell_type": "code", @@ -78,7 +82,8 @@ "from torch_judge import check\n", "check('topk_sampling')" ], - "execution_count": null + "execution_count": null, + "id": "cell-b0c1280f2d32" } ], "metadata": { @@ -93,5 +98,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/33_beam_search_solution.ipynb b/solutions/33_beam_search_solution.ipynb index 969edfa..e27d876 100644 --- a/solutions/33_beam_search_solution.ipynb +++ b/solutions/33_beam_search_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n", "# Solution: Beam Search Decoding\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-1ffe13920654" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a17595909cb7" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-b34e0b7c3141" }, { "cell_type": "code", @@ -62,7 +64,8 @@ " all_seqs.sort(key=lambda x: x[0], reverse=True)\n", " return all_seqs[0][1]" ], - "execution_count": null + "execution_count": null, + "id": "cell-f4d22eb16dd7" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "seq = beam_search(simple_fn, start_token=0, max_len=5, beam_width=2, eos_token=4)\n", "print('Sequence:', seq)" ], - "execution_count": null + "execution_count": null, + "id": "cell-35e190973e09" }, { "cell_type": "code", @@ -87,7 +91,8 @@ "from torch_judge import check\n", "check('beam_search')" ], - "execution_count": null + "execution_count": null, + "id": "cell-cd76a2ec0f70" } ], "metadata": { @@ -102,5 +107,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/34_speculative_decoding_solution.ipynb b/solutions/34_speculative_decoding_solution.ipynb index c13674a..5912325 100644 --- a/solutions/34_speculative_decoding_solution.ipynb +++ b/solutions/34_speculative_decoding_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n", "# Solution: Speculative Decoding\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-d43846ee867a" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ed354910bfe6" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-73e3a3ec063e" }, { "cell_type": "code", @@ -60,7 +62,8 @@ " return accepted\n", " return accepted" ], - "execution_count": null + "execution_count": null, + "id": "cell-68b83cc78318" }, { "cell_type": "code", @@ -73,7 +76,8 @@ "tokens = torch.tensor([2, 5, 1, 8])\n", "print('Perfect draft:', speculative_decode(probs, probs, tokens))" ], - "execution_count": null + "execution_count": null, + "id": "cell-732383d0709d" }, { "cell_type": "code", @@ -83,7 +87,8 @@ "from torch_judge import check\n", "check('speculative_decoding')" ], - "execution_count": null + "execution_count": null, + "id": "cell-940e6c72e58e" } ], "metadata": { @@ -98,5 +103,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/35_bpe_solution.ipynb b/solutions/35_bpe_solution.ipynb index 1b0d1e5..4472a2d 100644 --- a/solutions/35_bpe_solution.ipynb +++ b/solutions/35_bpe_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n", "# Solution: Byte-Pair Encoding (BPE)\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-bc2eec5fe687" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-0073db25e40f" }, { "cell_type": "code", @@ -32,7 +33,8 @@ "source": [ "# No imports needed" ], - "execution_count": null + "execution_count": null, + "id": "cell-dd99ec77f8a0" }, { "cell_type": "code", @@ -89,7 +91,8 @@ " all_tokens.extend(symbols)\n", " return all_tokens" ], - "execution_count": null + "execution_count": null, + "id": "cell-dc18695aafd1" }, { "cell_type": "code", @@ -102,7 +105,8 @@ "print('Merges:', bpe.merges)\n", "print('Encode:', bpe.encode('low lower newest'))" ], - "execution_count": null + "execution_count": null, + "id": "cell-d80bd6a1913e" }, { "cell_type": "code", @@ -112,7 +116,8 @@ "from torch_judge import check\n", "check('bpe')" ], - "execution_count": null + "execution_count": null, + "id": "cell-e3a37684c4f6" } ], "metadata": { @@ -127,5 +132,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/36_int8_quantization_solution.ipynb b/solutions/36_int8_quantization_solution.ipynb index 5a5e3ec..6b19ca3 100644 --- a/solutions/36_int8_quantization_solution.ipynb +++ b/solutions/36_int8_quantization_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n", "# Solution: INT8 Quantized Linear\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-c3d51ae293a7" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-3b7204d787e7" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-addc5b1d20ea" }, { "cell_type": "code", @@ -58,7 +60,8 @@ " out = out + self.bias\n", " return out" ], - "execution_count": null + "execution_count": null, + "id": "cell-6c065dfe4d16" }, { "cell_type": "code", @@ -72,7 +75,8 @@ "print('Weight dtype:', q.weight_int8.dtype)\n", "print('Compression: float32 -> int8 = 4x')" ], - "execution_count": null + "execution_count": null, + "id": "cell-1301564ac769" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('int8_quantization')" ], - "execution_count": null + "execution_count": null, + "id": "cell-731a5c4dcd89" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/37_dpo_loss_solution.ipynb b/solutions/37_dpo_loss_solution.ipynb index 0606b84..ca8edfc 100644 --- a/solutions/37_dpo_loss_solution.ipynb +++ b/solutions/37_dpo_loss_solution.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n", "# Solution: DPO (Direct Preference Optimization) Loss\n", "\n", "Reference solution." ], - "outputs": [] + "id": "cell-7143facb4472" }, { "cell_type": "code", @@ -23,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-f1b8f6b079d6" }, { "cell_type": "code", @@ -33,7 +34,8 @@ "import torch\n", "import torch.nn.functional as F" ], - "execution_count": null + "execution_count": null, + "id": "cell-2e84e0de7215" }, { "cell_type": "code", @@ -48,7 +50,8 @@ " rejected_rewards = beta * (policy_rejected_logps - ref_rejected_logps)\n", " return -F.logsigmoid(chosen_rewards - rejected_rewards).mean()" ], - "execution_count": null + "execution_count": null, + "id": "cell-539ed9540d9b" }, { "cell_type": "code", @@ -62,7 +65,8 @@ "ref_r = torch.tensor([-1.0, -1.0])\n", "print('Loss:', dpo_loss(chosen, rejected, ref_c, ref_r, beta=0.1).item())" ], - "execution_count": null + "execution_count": null, + "id": "cell-1652483434b7" }, { "cell_type": "code", @@ -72,7 +76,8 @@ "from torch_judge import check\n", "check('dpo_loss')" ], - "execution_count": null + "execution_count": null, + "id": "cell-ef88c30f1c7a" } ], "metadata": { @@ -87,5 +92,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/solutions/38_grpo_loss_solution.ipynb b/solutions/38_grpo_loss_solution.ipynb index 05ce2f6..6910cad 100644 --- a/solutions/38_grpo_loss_solution.ipynb +++ b/solutions/38_grpo_loss_solution.ipynb @@ -4,11 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n", "# Solution: GRPO (Group Relative Policy Optimization) Loss\n", "\n", "Reference solution." - ] + ], + "id": "cell-c3c37a3f40b5" }, { "cell_type": "code", @@ -22,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5291620c23dc" }, { "cell_type": "code", @@ -33,7 +35,8 @@ "import torch\n", "import torch.nn.functional as F\n", "from torch import Tensor" - ] + ], + "id": "cell-13d027881df4" }, { "cell_type": "code", @@ -67,7 +70,8 @@ "\n", " # GRPO objective: -E[A_i * logpi_i]\n", " return -(advantages_detached * logps).mean()\n" - ] + ], + "id": "cell-d878fd547e7a" }, { "cell_type": "code", @@ -80,7 +84,8 @@ "rewards = torch.tensor([1.0, 0.8, 0.2, 0.0])\n", "group_ids = torch.tensor([0, 0, 1, 1])\n", "print('Loss:', grpo_loss(logps, rewards, group_ids).item())" - ] + ], + "id": "cell-289d9d048e3c" }, { "cell_type": "code", @@ -90,7 +95,8 @@ "source": [ "from torch_judge import check\n", "check('grpo_loss')" - ] + ], + "id": "cell-d696b1c08d9b" } ], "metadata": { diff --git a/solutions/39_ppo_loss_solution.ipynb b/solutions/39_ppo_loss_solution.ipynb index 1818a32..ec7765c 100644 --- a/solutions/39_ppo_loss_solution.ipynb +++ b/solutions/39_ppo_loss_solution.ipynb @@ -4,11 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n", "# Solution: PPO Clipped Loss\n", "\n", "Reference solution for the PPO clipped surrogate loss task.\n" - ] + ], + "id": "cell-73977ce3fc1b" }, { "cell_type": "code", @@ -22,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-21f4c0dae2bc" }, { "cell_type": "code", @@ -33,7 +35,8 @@ "import torch\n", "import torch.nn.functional as F\n", "from torch import Tensor\n" - ] + ], + "id": "cell-958e8ccd84ee" }, { "cell_type": "code", @@ -65,7 +68,8 @@ "\n", " # PPO objective: negative mean of the more conservative objective\n", " return -torch.min(unclipped, clipped).mean()\n" - ] + ], + "id": "cell-29d7618b2b78" }, { "cell_type": "code", @@ -78,7 +82,8 @@ "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n", "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n", "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n" - ] + ], + "id": "cell-c46aa82b9f10" }, { "cell_type": "code", @@ -88,7 +93,8 @@ "source": [ "from torch_judge import check\n", "check('ppo_loss')\n" - ] + ], + "id": "cell-92aced739aa3" } ], "metadata": { diff --git a/solutions/40_linear_regression_solution.ipynb b/solutions/40_linear_regression_solution.ipynb index a745293..bcf1e04 100644 --- a/solutions/40_linear_regression_solution.ipynb +++ b/solutions/40_linear_regression_solution.ipynb @@ -4,11 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n", "# 🟑 Solution: Linear Regression\n", "\n", "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches." - ] + ], + "id": "cell-15853200c649" }, { "cell_type": "code", @@ -22,7 +23,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8a24230361ae" }, { "cell_type": "code", @@ -32,7 +34,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-08d16fb79bde" }, { "cell_type": "code", @@ -89,7 +92,8 @@ " b = layer.bias.data.squeeze(0) # scalar ()\n", " return w, b" ], - "execution_count": null + "execution_count": null, + "id": "cell-001be78ef74a" }, { "cell_type": "code", @@ -110,7 +114,8 @@ " print(f\"{name:13s} w={w.tolist()} b={b.item():.4f}\")\n", "print(f\"{'True':13s} w={true_w.tolist()} b=3.0000\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-904aa56cf10b" }, { "cell_type": "code", @@ -121,7 +126,8 @@ "from torch_judge import check\n", "check(\"linear_regression\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-4b31f4b8ca0a" } ], "metadata": { @@ -136,5 +142,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/00_welcome.ipynb b/templates/00_welcome.ipynb index c3498f5..868418c 100644 --- a/templates/00_welcome.ipynb +++ b/templates/00_welcome.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/00_welcome.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/00_welcome.ipynb)\n", "\n", "# πŸ”₯ TorchCode β€” PyTorch Operator Practice\n", "\n", @@ -33,8 +33,9 @@ "\n", "## Quick Start\n", "\n", - "πŸ“– **Reference solutions in Colab**: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) β€” Start with ReLU. Or use the **Colab** links in the table below for each solution." - ] + "πŸ“– **Reference solutions in Colab**: [Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) β€” Start with ReLU. Or use the **Colab** links in the table below for each solution." + ], + "id": "cell-d30023ba4676" }, { "cell_type": "code", @@ -48,7 +49,8 @@ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n", "except ImportError:\n", " pass\n" - ] + ], + "id": "cell-7abf370eea8e" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "source": [ "from torch_judge import status\n", "status()" - ] + ], + "id": "cell-cd409a5c25d0" }, { "cell_type": "markdown", @@ -145,7 +148,8 @@ "check(\"relu\") # Judge your implementation\n", "hint(\"causal_attention\") # Get a hint\n", "```" - ] + ], + "id": "cell-5f2cffdcaf3d" } ], "metadata": { @@ -160,5 +164,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/01_relu.ipynb b/templates/01_relu.ipynb index abe82e0..d1f61d7 100644 --- a/templates/01_relu.ipynb +++ b/templates/01_relu.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb)\n", "\n", "# 🟒 Easy: Implement ReLU\n", "\n", @@ -28,7 +28,7 @@ "Output: tensor([ 0., 0., 0., 1., 2.])\n", "```" ], - "outputs": [] + "id": "cell-488e24a3b562" }, { "cell_type": "code", @@ -42,7 +42,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2115a17fb167" }, { "cell_type": "code", @@ -51,7 +52,8 @@ "import torch" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-1846b083bfa6" }, { "cell_type": "code", @@ -64,7 +66,8 @@ "" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8dbfa8b0061b" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "print(\"Shape: \", relu(x).shape)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ebe90dc3c828" }, { "cell_type": "code", @@ -88,7 +92,8 @@ "check(\"relu\")" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-fc4b8825c5ee" } ], "metadata": { @@ -111,5 +116,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/02_softmax.ipynb b/templates/02_softmax.ipynb index b8ed6f8..5170d95 100644 --- a/templates/02_softmax.ipynb +++ b/templates/02_softmax.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb)\n", "\n", "# 🟒 Easy: Implement Softmax\n", "\n", @@ -28,7 +28,7 @@ "Output: tensor([0.0900, 0.2447, 0.6652]) # sums to 1.0\n", "```" ], - "outputs": [] + "id": "cell-ee62895f3be1" }, { "cell_type": "code", @@ -42,7 +42,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-bc1c7e2c95a0" }, { "cell_type": "code", @@ -51,7 +52,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-5b11467cc2d8" }, { "cell_type": "code", @@ -63,7 +65,8 @@ "def my_softmax(x: torch.Tensor, dim: int = -1) -> torch.Tensor:\n", " pass # Replace this" ], - "execution_count": null + "execution_count": null, + "id": "cell-235a94a146f9" }, { "cell_type": "code", @@ -76,7 +79,8 @@ "print(\"Sum: \", my_softmax(x, dim=-1).sum()) # should be ~1.0\n", "print(\"Ref: \", torch.softmax(x, dim=-1))" ], - "execution_count": null + "execution_count": null, + "id": "cell-1c9f8f50df2b" }, { "cell_type": "code", @@ -87,7 +91,8 @@ "from torch_judge import check\n", "check(\"softmax\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-4da14d5295f6" } ], "metadata": { @@ -102,5 +107,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/03_linear.ipynb b/templates/03_linear.ipynb index 29e4664..8005128 100644 --- a/templates/03_linear.ipynb +++ b/templates/03_linear.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb)\n", "\n", "# 🟑 Medium: Simple Linear Layer\n", "\n", @@ -24,7 +24,7 @@ "- `forward(x)` computes `x @ W^T + b`\n", "- Do **NOT** use `torch.nn.Linear`" ], - "outputs": [] + "id": "cell-11eb928f56bb" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-78f5c77ff8e0" }, { "cell_type": "code", @@ -48,7 +49,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-6cc01b59b785" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", " pass # Compute y = x @ W^T + b" ], - "execution_count": null + "execution_count": null, + "id": "cell-d8adc3a9e492" }, { "cell_type": "code", @@ -80,7 +83,8 @@ "y = layer.forward(x)\n", "print(\"Output shape:\", y.shape) # should be (2, 4)" ], - "execution_count": null + "execution_count": null, + "id": "cell-f2c84b5f5b6a" }, { "cell_type": "code", @@ -91,7 +95,8 @@ "from torch_judge import check\n", "check(\"linear\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-31cc2455bc19" } ], "metadata": { @@ -106,5 +111,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/04_layernorm.ipynb b/templates/04_layernorm.ipynb index 89f030b..dc35b41 100644 --- a/templates/04_layernorm.ipynb +++ b/templates/04_layernorm.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb)\n", "\n", "# 🟑 Medium: Implement LayerNorm\n", "\n", @@ -30,7 +30,7 @@ "- Normalize over the last dimension only\n", "- Must support autograd" ], - "outputs": [] + "id": "cell-13dd55cba342" }, { "cell_type": "code", @@ -44,7 +44,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-22f090de81a2" }, { "cell_type": "code", @@ -53,7 +54,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-333ba7d0eadb" }, { "cell_type": "code", @@ -65,7 +67,8 @@ "def my_layer_norm(x, gamma, beta, eps=1e-5):\n", " pass # Replace this" ], - "execution_count": null + "execution_count": null, + "id": "cell-591fa086ab56" }, { "cell_type": "code", @@ -84,7 +87,8 @@ "print(\"Your output std: \", out.std(dim=-1)) # should be ~1\n", "print(\"Match ref? \", torch.allclose(out, ref, atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-6ee009e37bd9" }, { "cell_type": "code", @@ -95,7 +99,8 @@ "from torch_judge import check\n", "check(\"layernorm\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-e2a35caadafb" } ], "metadata": { @@ -110,5 +115,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/05_attention.ipynb b/templates/05_attention.ipynb index 047243e..f0a50a1 100644 --- a/templates/05_attention.ipynb +++ b/templates/05_attention.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb)\n", "\n", "# πŸ”΄ Hard: Softmax Attention\n", "\n", @@ -27,7 +27,8 @@ "- You **may** use `torch.softmax` and `torch.bmm`\n", "- Must support autograd\n", "- Must handle cross-attention (seq_q β‰  seq_k)" - ] + ], + "id": "cell-8a67b627466a" }, { "cell_type": "code", @@ -41,7 +42,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-bd03e7868e64" }, { "cell_type": "code", @@ -51,7 +53,8 @@ "source": [ "import torch\n", "import math" - ] + ], + "id": "cell-7acc2da3c658" }, { "cell_type": "code", @@ -63,7 +66,8 @@ "\n", "def scaled_dot_product_attention(Q, K, V):\n", " pass # Replace this" - ] + ], + "id": "cell-8b9cfaebfaee" }, { "cell_type": "code", @@ -88,7 +92,8 @@ "V2 = torch.randn(1, 5, 32)\n", "out2 = scaled_dot_product_attention(Q2, K2, V2)\n", "print(\"Cross-attn shape:\", out2.shape) # should be (1, 3, 32)" - ] + ], + "id": "cell-7f5ed83e3b5a" }, { "cell_type": "code", @@ -99,7 +104,8 @@ "# βœ… SUBMIT\n", "from torch_judge import check\n", "check(\"attention\")" - ] + ], + "id": "cell-91157d972f57" } ], "metadata": { @@ -114,5 +120,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/06_multihead_attention.ipynb b/templates/06_multihead_attention.ipynb index 7812714..4ba8334 100644 --- a/templates/06_multihead_attention.ipynb +++ b/templates/06_multihead_attention.ipynb @@ -5,7 +5,7 @@ "id": "2bfeea64", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb)\n", "\n", "# πŸ”΄ Hard: Multi-Head Attention\n", "\n", @@ -78,7 +78,8 @@ "\n", " def forward(self, Q, K, V):\n", " pass # Implement multi-head attention" - ] + ], + "id": "cell-3a07876c936c" }, { "cell_type": "code", @@ -102,7 +103,8 @@ "V = torch.randn(1, 7, 32)\n", "out2 = mha.forward(Q, K, V)\n", "print(\"Cross-attn shape:\", out2.shape) # (1, 3, 32)" - ] + ], + "id": "cell-c902eedf2923" }, { "cell_type": "code", @@ -113,7 +115,8 @@ "# βœ… SUBMIT\n", "from torch_judge import check\n", "check(\"mha\")" - ] + ], + "id": "cell-40d9694d03c5" } ], "metadata": { diff --git a/templates/07_batchnorm.ipynb b/templates/07_batchnorm.ipynb index 6199e93..4f1645c 100644 --- a/templates/07_batchnorm.ipynb +++ b/templates/07_batchnorm.ipynb @@ -5,7 +5,7 @@ "id": "89fd15cb", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n", "\n", "# 🟑 Medium: Implement BatchNorm\n", "\n", @@ -55,7 +55,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a1d5510567a2" }, { "cell_type": "code", @@ -64,7 +65,8 @@ "outputs": [], "source": [ "import torch" - ] + ], + "id": "cell-b96e8321fb29" }, { "cell_type": "code", @@ -126,7 +128,8 @@ "# βœ… SUBMIT\n", "from torch_judge import check\n", "check(\"batchnorm\")" - ] + ], + "id": "cell-8776a5449c11" } ], "metadata": { diff --git a/templates/08_rmsnorm.ipynb b/templates/08_rmsnorm.ipynb index facfbf5..44bb03e 100644 --- a/templates/08_rmsnorm.ipynb +++ b/templates/08_rmsnorm.ipynb @@ -17,7 +17,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb)\n", "\n", "# 🟑 Medium: Implement RMSNorm\n", "\n", @@ -36,7 +36,7 @@ "- Normalize over `dim=-1`\n", "- Must support autograd" ], - "outputs": [] + "id": "cell-e12e535faa79" }, { "cell_type": "code", @@ -50,7 +50,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2c55ef9c078d" }, { "cell_type": "code", @@ -59,7 +60,8 @@ "import torch" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-68c3e79b21f4" }, { "cell_type": "code", @@ -71,7 +73,8 @@ " pass # Replace this" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e007d3472d17" }, { "cell_type": "code", @@ -85,7 +88,8 @@ "print(\"RMS of output:\", out.pow(2).mean(dim=-1).sqrt()) # should be ~1" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-96e430eb39cd" }, { "cell_type": "code", @@ -95,7 +99,8 @@ "check('rmsnorm')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-8c428d84e93c" } ] } diff --git a/templates/09_causal_attention.ipynb b/templates/09_causal_attention.ipynb index 24bdb6c..62bb162 100644 --- a/templates/09_causal_attention.ipynb +++ b/templates/09_causal_attention.ipynb @@ -17,28 +17,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb)\n", "\n", - "# πŸ”΄ Hard: Causal Self-Attention\n", + "# Hard: Causal Self-Attention\n", "\n", - "Implement **causal (masked) self-attention** β€” the attention used in GPT-style decoders.\n", + "Implement **causal (masked) self-attention** - the attention used in GPT-style decoders.\n", "\n", "Same as softmax attention, but each position can **only attend to itself and earlier positions** (no peeking at future tokens).\n", "\n", - "$$\\text{scores}_{ij} = \\begin{cases} \\frac{Q_i \\cdot K_j}{\\sqrt{d_k}} & \\text{if } j \\le i \\\\ -\\infty & \\text{if } j > i \\end{cases}$$\n", + "For each query position `i` and key position `j`:\n", + "\n", + "- If `j <= i`, use the scaled dot product score `Q_i dot K_j / sqrt(d_k)`.\n", + "- If `j > i`, mask the score to `-inf` before softmax.\n", "\n", "### Signature\n", "```python\n", "def causal_attention(Q, K, V):\n", - " # Q, K, V: (batch, seq, d) β†’ output: (batch, seq, d_v)\n", + " # Q, K, V: (batch, seq, d) -> output: (batch, seq, d_v)\n", "```\n", "\n", "### Rules\n", "- Do **NOT** use `F.scaled_dot_product_attention`\n", - "- Position $i$ can only attend to positions $\\le i$\n", + "- Position `i` can only attend to positions `<= i`\n", "- You **may** use `torch.softmax`, `torch.bmm`, `torch.triu`" ], - "outputs": [] + "id": "cell-7d839747fec3" }, { "cell_type": "code", @@ -52,7 +55,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-dc437a6bbd84" }, { "cell_type": "code", @@ -62,25 +66,27 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2d1e83527d76" }, { "cell_type": "code", "metadata": {}, "source": [ - "# ✏️ YOUR IMPLEMENTATION HERE\n", + "# YOUR IMPLEMENTATION HERE\n", "\n", "def causal_attention(Q, K, V):\n", " pass # Replace this" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a759e033e85f" }, { "cell_type": "code", "metadata": {}, "source": [ - "# πŸ§ͺ Debug\n", + "# Debug\n", "torch.manual_seed(0)\n", "Q = torch.randn(1, 4, 8)\n", "K = torch.randn(1, 4, 8)\n", @@ -90,7 +96,8 @@ "print(\"Pos 0 == V[0]?\", torch.allclose(out[:, 0], V[:, 0], atol=1e-5)) # should be True" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-dce8bccb8829" }, { "cell_type": "code", @@ -100,7 +107,8 @@ "check('causal_attention')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-567f615926ef" } ] } diff --git a/templates/10_gqa.ipynb b/templates/10_gqa.ipynb index 92d544d..1d38b2f 100644 --- a/templates/10_gqa.ipynb +++ b/templates/10_gqa.ipynb @@ -17,7 +17,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb)\n", "\n", "# πŸ”΄ Hard: Grouped Query Attention (GQA)\n", "\n", @@ -41,7 +41,7 @@ "- Expand KV heads with `repeat_interleave` to match Q heads\n", "- When `num_kv_heads == num_heads`, should behave like standard MHA" ], - "outputs": [] + "id": "cell-7bb0949ecab9" }, { "cell_type": "code", @@ -55,7 +55,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-a9f438b80c19" }, { "cell_type": "code", @@ -66,7 +67,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-72546cc3829f" }, { "cell_type": "code", @@ -82,7 +84,8 @@ " pass # Self-attention with grouped KV" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-afe0f28b92d2" }, { "cell_type": "code", @@ -99,7 +102,8 @@ "print(\"Output shape:\", out.shape) # (2, 6, 32)" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-d6bc0a4fd2b2" }, { "cell_type": "code", @@ -109,7 +113,8 @@ "check('gqa')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-fcded4c20296" } ] } diff --git a/templates/11_sliding_window.ipynb b/templates/11_sliding_window.ipynb index d201fb1..b9a014f 100644 --- a/templates/11_sliding_window.ipynb +++ b/templates/11_sliding_window.ipynb @@ -17,7 +17,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb)\n", "\n", "# πŸ”΄ Hard: Sliding Window Attention\n", "\n", @@ -38,7 +38,7 @@ "- `window_size=0`: only self β€” output should equal V\n", "- `window_size >= seq_len`: equivalent to full attention" ], - "outputs": [] + "id": "cell-e32a0584b338" }, { "cell_type": "code", @@ -52,7 +52,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-22f3b91fb2fb" }, { "cell_type": "code", @@ -62,7 +63,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-57792ca776ad" }, { "cell_type": "code", @@ -74,7 +76,8 @@ " pass # Replace this" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-7a409749a915" }, { "cell_type": "code", @@ -93,7 +96,8 @@ "print(\"window=0 == V?\", torch.allclose(out0, V, atol=1e-5))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-58a9801f9754" }, { "cell_type": "code", @@ -103,7 +107,8 @@ "check('sliding_window')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-b254d78dbd28" } ] } diff --git a/templates/12_linear_attention.ipynb b/templates/12_linear_attention.ipynb index 7d6ceeb..4bbb5f2 100644 --- a/templates/12_linear_attention.ipynb +++ b/templates/12_linear_attention.ipynb @@ -17,7 +17,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb)\n", "\n", "# πŸ”΄ Hard: Linear Self-Attention\n", "\n", @@ -45,7 +45,7 @@ "- Must be O(SΒ·DΒ²) β€” should run fast on long sequences\n", "- You **may** use `F.elu`" ], - "outputs": [] + "id": "cell-198932b02ac5" }, { "cell_type": "code", @@ -59,7 +59,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-6c7a263ebc92" }, { "cell_type": "code", @@ -69,7 +70,8 @@ "import torch.nn.functional as F" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5d3dbb703c26" }, { "cell_type": "code", @@ -81,7 +83,8 @@ " pass # Replace this" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-5758c217d64f" }, { "cell_type": "code", @@ -96,7 +99,8 @@ "print(\"Has NaN?\", torch.isnan(out).any().item())" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-94d00c576a69" }, { "cell_type": "code", @@ -106,7 +110,8 @@ "check('linear_attention')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-56e3495be182" } ] } diff --git a/templates/13_gpt2_block.ipynb b/templates/13_gpt2_block.ipynb index 3211781..afba13d 100644 --- a/templates/13_gpt2_block.ipynb +++ b/templates/13_gpt2_block.ipynb @@ -17,7 +17,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb)\n", "\n", "# πŸ”΄ Hard: GPT-2 Transformer Block\n", "\n", @@ -45,7 +45,7 @@ "- Pre-norm architecture (LayerNorm *before* attention and MLP)\n", "- Residual connections around both attention and MLP" ], - "outputs": [] + "id": "cell-d5cd40266298" }, { "cell_type": "code", @@ -59,7 +59,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-24b4bc91322b" }, { "cell_type": "code", @@ -70,7 +71,8 @@ "import math" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2ba29d7983a8" }, { "cell_type": "code", @@ -87,7 +89,8 @@ " pass # Pre-norm + causal attention + MLP with residuals" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-ca7018184d17" }, { "cell_type": "code", @@ -103,7 +106,8 @@ "print(\"Params:\", sum(p.numel() for p in block.parameters()))" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-da4eea3e58b3" }, { "cell_type": "code", @@ -113,7 +117,8 @@ "check('gpt2_block')" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-111f4d94ae43" } ] } diff --git a/templates/14_kv_cache.ipynb b/templates/14_kv_cache.ipynb index a7d9e0b..b690c16 100644 --- a/templates/14_kv_cache.ipynb +++ b/templates/14_kv_cache.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb)\n", "\n", "# πŸ”΄ Hard: KV Cache Attention\n", "\n", @@ -37,7 +37,7 @@ "Decode: [t5] β†’ Q=t5, K/V=cache+t5 β†’ cache = (K_{0:5}, V_{0:5})\n", "```" ], - "outputs": [] + "id": "cell-b5d61e9b5c17" }, { "cell_type": "code", @@ -51,7 +51,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-1fceb50c7e24" }, { "cell_type": "code", @@ -62,7 +63,8 @@ "import torch.nn as nn\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-6554367058cc" }, { "cell_type": "code", @@ -84,7 +86,8 @@ " # 5. Return (output, (K_all, V_all))\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-d3e7878409d1" }, { "cell_type": "code", @@ -108,7 +111,8 @@ "inc_out = torch.cat([out1, out2, out3], dim=1)\n", "print(\"Match:\", torch.allclose(full_out, inc_out, atol=1e-5))" ], - "execution_count": null + "execution_count": null, + "id": "cell-14bfb74b0635" }, { "cell_type": "code", @@ -119,7 +123,8 @@ "from torch_judge import check\n", "check('kv_cache')" ], - "execution_count": null + "execution_count": null, + "id": "cell-bf82c01bbc10" } ], "metadata": { @@ -134,5 +139,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/15_mlp.ipynb b/templates/15_mlp.ipynb index 0d238fa..ac6d6a9 100644 --- a/templates/15_mlp.ipynb +++ b/templates/15_mlp.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb)\n", "\n", "# 🟠 Medium: SwiGLU MLP\n", "\n", @@ -33,7 +33,7 @@ "the gate projection controls information flow, while the up projection provides the content.\n", "This consistently outperforms standard FFNs in practice (PaLM, LLaMA, Mistral all use it)." ], - "outputs": [] + "id": "cell-55d5c66de1fd" }, { "cell_type": "code", @@ -47,7 +47,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-7018cc74a7c2" }, { "cell_type": "code", @@ -58,7 +59,8 @@ "import torch.nn as nn\n", "import torch.nn.functional as F" ], - "execution_count": null + "execution_count": null, + "id": "cell-1c01e6c658f2" }, { "cell_type": "code", @@ -75,7 +77,8 @@ " def forward(self, x):\n", " pass # down_proj(silu(gate_proj(x)) * up_proj(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-c227aaa7f515" }, { "cell_type": "code", @@ -89,7 +92,8 @@ "print(\"Output shape:\", out.shape) # (2, 8, 64)\n", "print(\"Params:\", sum(p.numel() for p in mlp.parameters()))" ], - "execution_count": null + "execution_count": null, + "id": "cell-4b208f4bd01a" }, { "cell_type": "code", @@ -100,7 +104,8 @@ "from torch_judge import check\n", "check('mlp')" ], - "execution_count": null + "execution_count": null, + "id": "cell-af016ed9778b" } ], "metadata": { @@ -115,5 +120,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/16_cross_entropy.ipynb b/templates/16_cross_entropy.ipynb index 80b7765..5075d8f 100644 --- a/templates/16_cross_entropy.ipynb +++ b/templates/16_cross_entropy.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb)\n", "\n", "# 🟒 Easy: Cross-Entropy Loss\n", "\n", @@ -23,7 +23,7 @@ "- Do NOT use `F.cross_entropy` or `nn.CrossEntropyLoss`\n", "- Must be numerically stable (use logsumexp trick)" ], - "outputs": [] + "id": "cell-c375e29731a2" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e1170630e835" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-cfdc219c1b59" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "def cross_entropy_loss(logits, targets):\n", " pass # log_probs = logits - logsumexp(...)" ], - "execution_count": null + "execution_count": null, + "id": "cell-328aafbb75ac" }, { "cell_type": "code", @@ -71,7 +74,8 @@ "print('Loss:', cross_entropy_loss(logits, targets))\n", "print('Ref: ', torch.nn.functional.cross_entropy(logits, targets))" ], - "execution_count": null + "execution_count": null, + "id": "cell-701f0b3f1410" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('cross_entropy')" ], - "execution_count": null + "execution_count": null, + "id": "cell-f02326662761" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/17_dropout.ipynb b/templates/17_dropout.ipynb index d2ec346..2bbe358 100644 --- a/templates/17_dropout.ipynb +++ b/templates/17_dropout.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb)\n", "\n", "# 🟒 Easy: Implement Dropout\n", "\n", @@ -22,7 +22,7 @@ "- During **eval**: return input unchanged (identity)\n", "- Do NOT use `nn.Dropout` or `F.dropout`" ], - "outputs": [] + "id": "cell-b195fa80956b" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-44ea058d38c9" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-f94f37ecbb19" }, { "cell_type": "code", @@ -63,7 +65,8 @@ " def forward(self, x):\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-856839f0607b" }, { "cell_type": "code", @@ -78,7 +81,8 @@ "d.eval()\n", "print('Eval: ', d(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-29f86a714b53" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "from torch_judge import check\n", "check('dropout')" ], - "execution_count": null + "execution_count": null, + "id": "cell-be9c17b0787b" } ], "metadata": { @@ -104,5 +109,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/18_embedding.ipynb b/templates/18_embedding.ipynb index 1dc318c..ad3b088 100644 --- a/templates/18_embedding.ipynb +++ b/templates/18_embedding.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb)\n", "\n", "# 🟒 Easy: Embedding Layer\n", "\n", @@ -22,7 +22,7 @@ "- Forward: index into weight matrix β€” `weight[indices]`\n", "- Do NOT use `nn.Embedding`" ], - "outputs": [] + "id": "cell-2a2807c82e7f" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-875f47d08249" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-e312e347e7aa" }, { "cell_type": "code", @@ -63,7 +65,8 @@ " def forward(self, indices):\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-22589eb5a20c" }, { "cell_type": "code", @@ -76,7 +79,8 @@ "print('Output shape:', emb(idx).shape)\n", "print('Matches manual:', torch.equal(emb(idx)[0], emb.weight[0]))" ], - "execution_count": null + "execution_count": null, + "id": "cell-687ee2396cd4" }, { "cell_type": "code", @@ -87,7 +91,8 @@ "from torch_judge import check\n", "check('embedding')" ], - "execution_count": null + "execution_count": null, + "id": "cell-a5b5d989b3e5" } ], "metadata": { @@ -102,5 +107,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/19_gelu.ipynb b/templates/19_gelu.ipynb index 613c65c..bc79535 100644 --- a/templates/19_gelu.ipynb +++ b/templates/19_gelu.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb)\n", "\n", "# 🟒 Easy: GELU Activation\n", "\n", @@ -21,7 +21,7 @@ "- Do NOT use `F.gelu`, `nn.GELU`, or `torch.nn.functional.gelu`\n", "- Use `torch.erf` for the exact version" ], - "outputs": [] + "id": "cell-4f5fb89c2573" }, { "cell_type": "code", @@ -35,7 +35,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-fd66bf0d9d1f" }, { "cell_type": "code", @@ -45,7 +46,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-f8d468af7734" }, { "cell_type": "code", @@ -57,7 +59,8 @@ "def my_gelu(x):\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-b5fe89008e24" }, { "cell_type": "code", @@ -69,7 +72,8 @@ "print('Output:', my_gelu(x))\n", "print('Ref: ', torch.nn.functional.gelu(x))" ], - "execution_count": null + "execution_count": null, + "id": "cell-8862265adb2d" }, { "cell_type": "code", @@ -80,7 +84,8 @@ "from torch_judge import check\n", "check('gelu')" ], - "execution_count": null + "execution_count": null, + "id": "cell-6764a5dea69a" } ], "metadata": { @@ -95,5 +100,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/20_weight_init.ipynb b/templates/20_weight_init.ipynb index 2a35343..6470bab 100644 --- a/templates/20_weight_init.ipynb +++ b/templates/20_weight_init.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb)\n", "\n", "# 🟒 Easy: Kaiming Initialization\n", "\n", @@ -20,7 +20,7 @@ " # Returns the weight tensor\n", "```" ], - "outputs": [] + "id": "cell-c3fda7896e25" }, { "cell_type": "code", @@ -34,7 +34,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-236761a91c27" }, { "cell_type": "code", @@ -44,7 +45,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-499b086c88b6" }, { "cell_type": "code", @@ -56,7 +58,8 @@ "def kaiming_init(weight):\n", " pass # fill with normal(0, sqrt(2/fan_in))" ], - "execution_count": null + "execution_count": null, + "id": "cell-d23438bb715e" }, { "cell_type": "code", @@ -70,7 +73,8 @@ "print(f'Mean: {w.mean():.4f} (expect ~0)')\n", "print(f'Std: {w.std():.4f} (expect {math.sqrt(2/512):.4f})')" ], - "execution_count": null + "execution_count": null, + "id": "cell-809b6e52b602" }, { "cell_type": "code", @@ -81,7 +85,8 @@ "from torch_judge import check\n", "check('weight_init')" ], - "execution_count": null + "execution_count": null, + "id": "cell-b277660b87af" } ], "metadata": { @@ -96,5 +101,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/21_gradient_clipping.ipynb b/templates/21_gradient_clipping.ipynb index 4f783f8..ba837d5 100644 --- a/templates/21_gradient_clipping.ipynb +++ b/templates/21_gradient_clipping.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb)\n", "\n", "# 🟒 Easy: Gradient Norm Clipping\n", "\n", @@ -22,7 +22,7 @@ "2. If total > max_norm: scale all grads by `max_norm / total`\n", "3. Return original total norm" ], - "outputs": [] + "id": "cell-e28caf6c3582" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-b413d4367239" }, { "cell_type": "code", @@ -45,7 +46,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-21b2f0a310f7" }, { "cell_type": "code", @@ -57,7 +59,8 @@ "def clip_grad_norm(parameters, max_norm):\n", " pass # compute total norm, clip if needed, return original norm" ], - "execution_count": null + "execution_count": null, + "id": "cell-6017d76191ec" }, { "cell_type": "code", @@ -72,7 +75,8 @@ "print('After: ', p.grad.norm().item())\n", "print('Original norm:', orig)" ], - "execution_count": null + "execution_count": null, + "id": "cell-8e06f47a1286" }, { "cell_type": "code", @@ -83,7 +87,8 @@ "from torch_judge import check\n", "check('gradient_clipping')" ], - "execution_count": null + "execution_count": null, + "id": "cell-6ce4d03f65b1" } ], "metadata": { @@ -98,5 +103,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/22_conv2d.ipynb b/templates/22_conv2d.ipynb index 9f5c0cd..11e062e 100644 --- a/templates/22_conv2d.ipynb +++ b/templates/22_conv2d.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb)\n", "\n", "# 🟠 Medium: 2D Convolution\n", "\n", @@ -22,7 +22,7 @@ "- Support `stride` and `padding` parameters\n", "- `F.pad` for zero-padding is allowed" ], - "outputs": [] + "id": "cell-5e18a8e38faa" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-7270ae5fe54f" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "import torch\n", "import torch.nn.functional as F" ], - "execution_count": null + "execution_count": null, + "id": "cell-69c0ede1341a" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "def my_conv2d(x, weight, bias=None, stride=1, padding=0):\n", " pass # extract patches, apply kernel, handle stride/padding" ], - "execution_count": null + "execution_count": null, + "id": "cell-7f0d7ebada60" }, { "cell_type": "code", @@ -71,7 +74,8 @@ "print('Output:', my_conv2d(x, w).shape)\n", "print('Match:', torch.allclose(my_conv2d(x, w), F.conv2d(x, w), atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-e74cb3ef2145" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('conv2d')" ], - "execution_count": null + "execution_count": null, + "id": "cell-d1448814fd6c" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/23_cross_attention.ipynb b/templates/23_cross_attention.ipynb index 2467285..5a480fb 100644 --- a/templates/23_cross_attention.ipynb +++ b/templates/23_cross_attention.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb)\n", "\n", "# 🟠 Medium: Multi-Head Cross-Attention\n", "\n", @@ -23,7 +23,7 @@ "- Q comes from the decoder, K and V come from the encoder\n", "- No causal mask (all encoder positions visible)" ], - "outputs": [] + "id": "cell-34a817f5fad7" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-435b5ae23b1b" }, { "cell_type": "code", @@ -48,7 +49,8 @@ "import torch.nn as nn\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-f994399f2c94" }, { "cell_type": "code", @@ -65,7 +67,8 @@ " def forward(self, x_q, x_kv):\n", " pass # Q from x_q, K/V from x_kv, no causal mask" ], - "execution_count": null + "execution_count": null, + "id": "cell-1f37d11c8a9d" }, { "cell_type": "code", @@ -78,7 +81,8 @@ "x_kv = torch.randn(2, 10, 64)\n", "print('Output:', attn(x_q, x_kv).shape)" ], - "execution_count": null + "execution_count": null, + "id": "cell-64eafa287bd5" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "from torch_judge import check\n", "check('cross_attention')" ], - "execution_count": null + "execution_count": null, + "id": "cell-1f9c3bf410d6" } ], "metadata": { @@ -104,5 +109,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/24_rope.ipynb b/templates/24_rope.ipynb index d4ffd7c..f624e51 100644 --- a/templates/24_rope.ipynb +++ b/templates/24_rope.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb)\n", "\n", "# πŸ”΄ Hard: Rotary Position Embedding (RoPE)\n", "\n", @@ -24,7 +24,7 @@ "```\n", "This makes `dot(q_rot[i], k_rot[j])` depend only on `i - j` (relative position)." ], - "outputs": [] + "id": "cell-da2a3a074d20" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-84b6ccc614aa" }, { "cell_type": "code", @@ -48,7 +49,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-5e4a7489f78a" }, { "cell_type": "code", @@ -63,7 +65,8 @@ " # 3. Apply rotation\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-4a15485a11b8" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "print('Shape preserved:', qr.shape == q.shape)\n", "print('Norm preserved:', torch.allclose(q.norm(dim=-1), qr.norm(dim=-1), atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-bc2629a4ce01" }, { "cell_type": "code", @@ -88,7 +92,8 @@ "from torch_judge import check\n", "check('rope')" ], - "execution_count": null + "execution_count": null, + "id": "cell-c030cc8cf1ed" } ], "metadata": { @@ -103,5 +108,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/25_flash_attention.ipynb b/templates/25_flash_attention.ipynb index 249cd2a..304023f 100644 --- a/templates/25_flash_attention.ipynb +++ b/templates/25_flash_attention.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb)\n", "\n", "# πŸ”΄ Hard: Flash Attention (Tiled)\n", "\n", @@ -26,7 +26,7 @@ "\n", "Must give **identical** results to standard softmax attention." ], - "outputs": [] + "id": "cell-a3a2f1752b33" }, { "cell_type": "code", @@ -40,7 +40,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-41f68149b64d" }, { "cell_type": "code", @@ -50,7 +51,8 @@ "import torch\n", "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-2682ca1fa6cb" }, { "cell_type": "code", @@ -63,7 +65,8 @@ " # Process Q in blocks, iterate K/V blocks with online softmax\n", " pass" ], - "execution_count": null + "execution_count": null, + "id": "cell-3fbdae26e27b" }, { "cell_type": "code", @@ -78,7 +81,8 @@ "ref = torch.bmm(torch.softmax(scores, dim=-1), V)\n", "print('Match:', torch.allclose(out, ref, atol=1e-4))" ], - "execution_count": null + "execution_count": null, + "id": "cell-39451abb2fb9" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "from torch_judge import check\n", "check('flash_attention')" ], - "execution_count": null + "execution_count": null, + "id": "cell-6b54c01cbe94" } ], "metadata": { @@ -104,5 +109,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/26_lora.ipynb b/templates/26_lora.ipynb index eaf7f69..a011bbd 100644 --- a/templates/26_lora.ipynb +++ b/templates/26_lora.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb)\n", "\n", "# 🟠 Medium: LoRA (Low-Rank Adaptation)\n", "\n", @@ -25,7 +25,7 @@ "- `self.lora_B`: `nn.Parameter(out_features, rank)` β€” **zero** init\n", "- Scaling: `alpha / rank`" ], - "outputs": [] + "id": "cell-4f3ac30b4879" }, { "cell_type": "code", @@ -39,7 +39,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-804235218069" }, { "cell_type": "code", @@ -49,7 +50,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-738ad84143a1" }, { "cell_type": "code", @@ -66,7 +68,8 @@ " def forward(self, x):\n", " pass # base + lora" ], - "execution_count": null + "execution_count": null, + "id": "cell-7d1bc24164ef" }, { "cell_type": "code", @@ -80,7 +83,8 @@ "print('Trainable:', sum(p.numel() for p in layer.parameters() if p.requires_grad))\n", "print('Total: ', sum(p.numel() for p in layer.parameters()))" ], - "execution_count": null + "execution_count": null, + "id": "cell-d02803ceb58b" }, { "cell_type": "code", @@ -91,7 +95,8 @@ "from torch_judge import check\n", "check('lora')" ], - "execution_count": null + "execution_count": null, + "id": "cell-5d161facb5df" } ], "metadata": { @@ -106,5 +111,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/27_vit_patch.ipynb b/templates/27_vit_patch.ipynb index 17acd59..9c15d46 100644 --- a/templates/27_vit_patch.ipynb +++ b/templates/27_vit_patch.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb)\n", "\n", "# 🟠 Medium: ViT Patch Embedding\n", "\n", @@ -24,7 +24,7 @@ "2. Project each patch: `nn.Linear(C*P*P, embed_dim)`\n", "3. `num_patches = (img_size // patch_size) ** 2`" ], - "outputs": [] + "id": "cell-e3dac63f0643" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-740f83775300" }, { "cell_type": "code", @@ -48,7 +49,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-d41a3c3a21ea" }, { "cell_type": "code", @@ -65,7 +67,8 @@ " def forward(self, x):\n", " pass # reshape to patches, project" ], - "execution_count": null + "execution_count": null, + "id": "cell-dc7725416d25" }, { "cell_type": "code", @@ -78,7 +81,8 @@ "print('Output:', pe(x).shape)\n", "print('Patches:', pe.num_patches)" ], - "execution_count": null + "execution_count": null, + "id": "cell-2413ef692804" }, { "cell_type": "code", @@ -89,7 +93,8 @@ "from torch_judge import check\n", "check('vit_patch')" ], - "execution_count": null + "execution_count": null, + "id": "cell-19a6048ad032" } ], "metadata": { @@ -104,5 +109,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/28_moe.ipynb b/templates/28_moe.ipynb index f50fe33..1b8ef16 100644 --- a/templates/28_moe.ipynb +++ b/templates/28_moe.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb)\n", "\n", "# πŸ”΄ Hard: Mixture of Experts (MoE)\n", "\n", @@ -23,7 +23,7 @@ "- `self.experts`: `nn.ModuleList` of MLPs `(Linearβ†’ReLUβ†’Linear)`\n", "- For each token: select top-k experts, compute weighted sum of their outputs" ], - "outputs": [] + "id": "cell-9c304a8c3956" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-20ad10e1e33a" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-34b93b3abcdd" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " def forward(self, x):\n", " pass # route tokens to top-k experts" ], - "execution_count": null + "execution_count": null, + "id": "cell-f436bda3de57" }, { "cell_type": "code", @@ -77,7 +80,8 @@ "print('Output:', moe(x).shape)\n", "print('Params:', sum(p.numel() for p in moe.parameters()))" ], - "execution_count": null + "execution_count": null, + "id": "cell-561861c5a056" }, { "cell_type": "code", @@ -88,7 +92,8 @@ "from torch_judge import check\n", "check('moe')" ], - "execution_count": null + "execution_count": null, + "id": "cell-51890761b776" } ], "metadata": { @@ -103,5 +108,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/29_adam.ipynb b/templates/29_adam.ipynb index a3c1d63..c6316df 100644 --- a/templates/29_adam.ipynb +++ b/templates/29_adam.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb)\n", "\n", "# 🟠 Medium: Adam Optimizer\n", "\n", @@ -27,7 +27,7 @@ "p -= lr * mΜ‚ / (√vΜ‚ + Ξ΅)\n", "```" ], - "outputs": [] + "id": "cell-5cb31982aaf8" }, { "cell_type": "code", @@ -41,7 +41,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-1cb4c371e8f7" }, { "cell_type": "code", @@ -50,7 +51,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-3a93c9989281" }, { "cell_type": "code", @@ -69,7 +71,8 @@ " def zero_grad(self):\n", " pass # zero all gradients" ], - "execution_count": null + "execution_count": null, + "id": "cell-a236feaf1380" }, { "cell_type": "code", @@ -87,7 +90,8 @@ " opt.zero_grad()\n", " print(f'Step {i}: loss={loss.item():.4f}')" ], - "execution_count": null + "execution_count": null, + "id": "cell-b86f6e0429ec" }, { "cell_type": "code", @@ -98,7 +102,8 @@ "from torch_judge import check\n", "check('adam')" ], - "execution_count": null + "execution_count": null, + "id": "cell-64d025317dea" } ], "metadata": { @@ -113,5 +118,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/30_cosine_lr.ipynb b/templates/30_cosine_lr.ipynb index 108f579..d6ae82b 100644 --- a/templates/30_cosine_lr.ipynb +++ b/templates/30_cosine_lr.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb)\n", "\n", "# 🟠 Medium: Cosine LR Scheduler with Warmup\n", "\n", @@ -22,7 +22,7 @@ "```\n", "where `progress = (step - warmup) / (total - warmup)`" ], - "outputs": [] + "id": "cell-2f7ab12b09f5" }, { "cell_type": "code", @@ -36,7 +36,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-315c66c2530d" }, { "cell_type": "code", @@ -45,7 +46,8 @@ "source": [ "import math" ], - "execution_count": null + "execution_count": null, + "id": "cell-b65689467914" }, { "cell_type": "code", @@ -57,7 +59,8 @@ "def cosine_lr_schedule(step, total_steps, warmup_steps, max_lr, min_lr=0.0):\n", " pass # warmup then cosine decay" ], - "execution_count": null + "execution_count": null, + "id": "cell-11a186b646b6" }, { "cell_type": "code", @@ -71,7 +74,8 @@ "print(f'Mid: {lrs[55]:.6f}')\n", "print(f'End: {lrs[100]:.6f}')" ], - "execution_count": null + "execution_count": null, + "id": "cell-ad10b7e52e80" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('cosine_lr')" ], - "execution_count": null + "execution_count": null, + "id": "cell-5c0c6e53cf62" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/31_gradient_accumulation.ipynb b/templates/31_gradient_accumulation.ipynb index 78e205e..dfc4156 100644 --- a/templates/31_gradient_accumulation.ipynb +++ b/templates/31_gradient_accumulation.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb)\n", "\n", "# 🟒 Easy: Gradient Accumulation\n", "\n", @@ -25,7 +25,7 @@ "\n", "The key insight: dividing each loss by `n` before backward makes accumulated gradients equal to a single large-batch gradient." ], - "outputs": [] + "id": "cell-729ba7c32f56" }, { "cell_type": "code", @@ -39,7 +39,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e5d4e04e13dd" }, { "cell_type": "code", @@ -49,7 +50,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-1782016823d3" }, { "cell_type": "code", @@ -61,7 +63,8 @@ "def accumulated_step(model, optimizer, loss_fn, micro_batches):\n", " pass # zero_grad, loop (forward, scale loss, backward), step" ], - "execution_count": null + "execution_count": null, + "id": "cell-252c5a1ae8bd" }, { "cell_type": "code", @@ -75,7 +78,8 @@ " [(torch.randn(2, 4), torch.randn(2, 2)) for _ in range(4)])\n", "print('Loss:', loss)" ], - "execution_count": null + "execution_count": null, + "id": "cell-c342b0f437a0" }, { "cell_type": "code", @@ -86,7 +90,8 @@ "from torch_judge import check\n", "check('gradient_accumulation')" ], - "execution_count": null + "execution_count": null, + "id": "cell-fb30a026811c" } ], "metadata": { @@ -101,5 +106,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/32_topk_sampling.ipynb b/templates/32_topk_sampling.ipynb index bbb788e..04681c4 100644 --- a/templates/32_topk_sampling.ipynb +++ b/templates/32_topk_sampling.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb)\n", "\n", "# 🟠 Medium: Top-k / Top-p (Nucleus) Sampling\n", "\n", @@ -23,7 +23,7 @@ "3. Top-p: sort by prob, mask tokens where cumulative prob exceeds p\n", "4. Sample from filtered distribution" ], - "outputs": [] + "id": "cell-232eac9049c8" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-2bbecf087b2d" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-a29f7d64a079" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "def sample_top_k_top_p(logits, top_k=0, top_p=1.0, temperature=1.0):\n", " pass # temperature, top-k filter, top-p filter, sample" ], - "execution_count": null + "execution_count": null, + "id": "cell-0b28e6279efe" }, { "cell_type": "code", @@ -71,7 +74,8 @@ "print('top_p=0.5:', sample_top_k_top_p(logits.clone(), top_p=0.5))\n", "print('temp=0.01:', sample_top_k_top_p(logits.clone(), temperature=0.01))" ], - "execution_count": null + "execution_count": null, + "id": "cell-fe6e6bdda90f" }, { "cell_type": "code", @@ -82,7 +86,8 @@ "from torch_judge import check\n", "check('topk_sampling')" ], - "execution_count": null + "execution_count": null, + "id": "cell-d06147650a42" } ], "metadata": { @@ -97,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/33_beam_search.ipynb b/templates/33_beam_search.ipynb index 7735f71..f708333 100644 --- a/templates/33_beam_search.ipynb +++ b/templates/33_beam_search.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb)\n", "\n", "# 🟠 Medium: Beam Search Decoding\n", "\n", @@ -23,7 +23,7 @@ "3. Keep top `beam_width` beams by total log-probability\n", "4. Stop when best beam ends with `eos_token` or `max_len` reached" ], - "outputs": [] + "id": "cell-a1ce1a0ad1e8" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-09943e13a512" }, { "cell_type": "code", @@ -46,7 +47,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-76094db43ca6" }, { "cell_type": "code", @@ -58,7 +60,8 @@ "def beam_search(log_prob_fn, start_token, max_len, beam_width, eos_token):\n", " pass # maintain beams, expand, prune, return best" ], - "execution_count": null + "execution_count": null, + "id": "cell-fa2f812569bc" }, { "cell_type": "code", @@ -73,7 +76,8 @@ "seq = beam_search(simple_fn, start_token=0, max_len=5, beam_width=2, eos_token=4)\n", "print('Sequence:', seq)" ], - "execution_count": null + "execution_count": null, + "id": "cell-c95ad95c3613" }, { "cell_type": "code", @@ -84,7 +88,8 @@ "from torch_judge import check\n", "check('beam_search')" ], - "execution_count": null + "execution_count": null, + "id": "cell-2d48c63dbaf6" } ], "metadata": { @@ -99,5 +104,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/34_speculative_decoding.ipynb b/templates/34_speculative_decoding.ipynb index 5b8e692..d17e494 100644 --- a/templates/34_speculative_decoding.ipynb +++ b/templates/34_speculative_decoding.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb)\n", "\n", "# πŸ”΄ Hard: Speculative Decoding\n", "\n", @@ -25,7 +25,7 @@ "2. Accept with probability `min(1, ratio)`\n", "3. If rejected: sample from `normalize(max(0, target - draft))`, append, and stop" ], - "outputs": [] + "id": "cell-36f582c1e352" }, { "cell_type": "code", @@ -39,7 +39,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-24210e4e8b04" }, { "cell_type": "code", @@ -48,7 +49,8 @@ "source": [ "import torch" ], - "execution_count": null + "execution_count": null, + "id": "cell-619e8bf022fe" }, { "cell_type": "code", @@ -60,7 +62,8 @@ "def speculative_decode(target_probs, draft_probs, draft_tokens):\n", " pass # accept/reject loop" ], - "execution_count": null + "execution_count": null, + "id": "cell-1af7a28c2e7a" }, { "cell_type": "code", @@ -76,7 +79,8 @@ "draft = torch.softmax(torch.randn(4, 10), dim=-1)\n", "print('Random draft:', speculative_decode(target, draft, tokens))" ], - "execution_count": null + "execution_count": null, + "id": "cell-4881fb28d742" }, { "cell_type": "code", @@ -87,7 +91,8 @@ "from torch_judge import check\n", "check('speculative_decoding')" ], - "execution_count": null + "execution_count": null, + "id": "cell-762b854c38a3" } ], "metadata": { @@ -102,5 +107,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/35_bpe.ipynb b/templates/35_bpe.ipynb index 9adfca0..8233175 100644 --- a/templates/35_bpe.ipynb +++ b/templates/35_bpe.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb)\n", "\n", "# πŸ”΄ Hard: Byte-Pair Encoding (BPE)\n", "\n", @@ -24,7 +24,7 @@ "3. Merge the most frequent pair into a single token\n", "4. Repeat for `num_merges` iterations" ], - "outputs": [] + "id": "cell-7c7bccea580e" }, { "cell_type": "code", @@ -38,7 +38,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-b0071d4dba45" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "source": [ "# No imports needed" ], - "execution_count": null + "execution_count": null, + "id": "cell-403c83cef060" }, { "cell_type": "code", @@ -66,7 +68,8 @@ " def encode(self, text):\n", " pass # apply learned merges to split text" ], - "execution_count": null + "execution_count": null, + "id": "cell-6c505da33a0d" }, { "cell_type": "code", @@ -79,7 +82,8 @@ "print('Merges:', bpe.merges[:5])\n", "print('Encode:', bpe.encode('low lower'))" ], - "execution_count": null + "execution_count": null, + "id": "cell-df894fa7e08e" }, { "cell_type": "code", @@ -90,7 +94,8 @@ "from torch_judge import check\n", "check('bpe')" ], - "execution_count": null + "execution_count": null, + "id": "cell-c1eabc45d192" } ], "metadata": { @@ -105,5 +110,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/36_int8_quantization.ipynb b/templates/36_int8_quantization.ipynb index 0c3516a..c35baed 100644 --- a/templates/36_int8_quantization.ipynb +++ b/templates/36_int8_quantization.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb)\n", "\n", "# πŸ”΄ Hard: INT8 Quantized Linear\n", "\n", @@ -23,7 +23,7 @@ "3. Store as `register_buffer` (not trainable)\n", "4. Forward: dequantize (`int8.float() * scale`) then matmul" ], - "outputs": [] + "id": "cell-7db8d2e6ad86" }, { "cell_type": "code", @@ -37,7 +37,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-bad6a1fafbe1" }, { "cell_type": "code", @@ -47,7 +48,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-8535b80f5f3f" }, { "cell_type": "code", @@ -64,7 +66,8 @@ " def forward(self, x):\n", " pass # dequantize and matmul" ], - "execution_count": null + "execution_count": null, + "id": "cell-40724188a196" }, { "cell_type": "code", @@ -79,7 +82,8 @@ "print('dtype:', q.weight_int8.dtype)\n", "print('Max quant error:', (w - q.weight_int8.float() * q.scale).abs().max().item())" ], - "execution_count": null + "execution_count": null, + "id": "cell-110ae54125bd" }, { "cell_type": "code", @@ -90,7 +94,8 @@ "from torch_judge import check\n", "check('int8_quantization')" ], - "execution_count": null + "execution_count": null, + "id": "cell-f0d8000cd49c" } ], "metadata": { @@ -105,5 +110,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/37_dpo_loss.ipynb b/templates/37_dpo_loss.ipynb index 38ea5b5..87d734d 100644 --- a/templates/37_dpo_loss.ipynb +++ b/templates/37_dpo_loss.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb)\n", "\n", "# πŸ”΄ Hard: DPO Loss\n", "\n", @@ -19,7 +19,8 @@ " # All inputs: (B,) log-probabilities\n", " # Returns: scalar loss\n", "```" - ] + ], + "id": "cell-5f1148d811f0" }, { "cell_type": "code", @@ -33,7 +34,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-50824f7a9545" }, { "cell_type": "code", @@ -43,7 +45,8 @@ "source": [ "import torch\n", "import torch.nn.functional as F" - ] + ], + "id": "cell-9e2999e4f730" }, { "cell_type": "code", @@ -56,7 +59,8 @@ "def dpo_loss(policy_chosen_logps, policy_rejected_logps,\n", " ref_chosen_logps, ref_rejected_logps, beta=0.1):\n", " pass # -log(sigmoid(beta * (chosen_reward - rejected_reward)))" - ] + ], + "id": "cell-6bb23f54bf17" }, { "cell_type": "code", @@ -70,7 +74,8 @@ "ref_c = torch.tensor([-1.0, -1.0])\n", "ref_r = torch.tensor([-1.0, -1.0])\n", "print('Loss:', dpo_loss(chosen, rejected, ref_c, ref_r, beta=0.1).item())" - ] + ], + "id": "cell-a4667c46e10b" }, { "cell_type": "code", @@ -81,7 +86,8 @@ "# βœ… SUBMIT\n", "from torch_judge import check\n", "check('dpo_loss')" - ] + ], + "id": "cell-1c162f8718df" } ], "metadata": { @@ -96,5 +102,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/templates/38_grpo_loss.ipynb b/templates/38_grpo_loss.ipynb index 546f212..5e64339 100644 --- a/templates/38_grpo_loss.ipynb +++ b/templates/38_grpo_loss.ipynb @@ -5,7 +5,7 @@ "id": "968cc37c", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n", "\n", "# πŸ”΄ Hard: GRPO Loss\n", "\n", @@ -49,7 +49,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-763451936f66" }, { "cell_type": "code", diff --git a/templates/39_ppo_loss.ipynb b/templates/39_ppo_loss.ipynb index 7958571..21d9907 100644 --- a/templates/39_ppo_loss.ipynb +++ b/templates/39_ppo_loss.ipynb @@ -5,7 +5,7 @@ "id": "6674fa96", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n", "\n", "# πŸ”΄ Hard: PPO Clipped Loss\n", "\n", @@ -54,7 +54,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-e455b0c64ef7" }, { "cell_type": "code", @@ -65,7 +66,8 @@ "import torch\n", "import torch.nn.functional as F\n", "from torch import Tensor\n" - ] + ], + "id": "cell-9a423ce072a5" }, { "cell_type": "code", @@ -78,7 +80,8 @@ "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n", " clip_ratio: float = 0.2) -> Tensor:\n", " pass # -mean(min(r * adv, clamp(r, 1-clip, 1+clip) * adv)) with gradients only through new_logps\n" - ] + ], + "id": "cell-78651a5bf0cc" }, { "cell_type": "code", @@ -91,7 +94,8 @@ "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n", "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n", "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n" - ] + ], + "id": "cell-06a29e93ae34" }, { "cell_type": "code", @@ -102,7 +106,8 @@ "# βœ… SUBMIT\n", "from torch_judge import check\n", "check('ppo_loss')\n" - ] + ], + "id": "cell-837482c2b65a" } ], "metadata": { diff --git a/templates/40_linear_regression.ipynb b/templates/40_linear_regression.ipynb index 1d099e7..24da9dd 100644 --- a/templates/40_linear_regression.ipynb +++ b/templates/40_linear_regression.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n", + "[Open in Colab](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n", "\n", "# 🟑 Medium: Linear Regression\n", "\n", @@ -52,7 +52,8 @@ "- `closed_form` must not use iterative optimization\n", "- `gradient_descent` must manually compute gradients (no `autograd`)\n", "- `nn_linear` should use `torch.nn.Linear` and `loss.backward()`" - ] + ], + "id": "cell-d1745b1876b4" }, { "cell_type": "code", @@ -66,7 +67,8 @@ " pass\n" ], "outputs": [], - "execution_count": null + "execution_count": null, + "id": "cell-3be7852a281e" }, { "cell_type": "code", @@ -76,7 +78,8 @@ "import torch\n", "import torch.nn as nn" ], - "execution_count": null + "execution_count": null, + "id": "cell-6a500c47f5a9" }, { "cell_type": "code", @@ -100,7 +103,8 @@ " \"\"\"Train nn.Linear with autograd\"\"\"\n", " pass # Return (w, b)" ], - "execution_count": null + "execution_count": null, + "id": "cell-b1ff9cc6ea6b" }, { "cell_type": "code", @@ -126,7 +130,8 @@ "\n", "print(f\"\\nTrue: w={true_w}, b=3.0\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-f756deb6e805" }, { "cell_type": "code", @@ -137,7 +142,8 @@ "from torch_judge import check\n", "check(\"linear_regression\")" ], - "execution_count": null + "execution_count": null, + "id": "cell-2a5bddbe8d48" } ], "metadata": { @@ -152,5 +158,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 }