From 4bc07656890f96b43a8de2cdacf84c271117b966 Mon Sep 17 00:00:00 2001 From: Rkeramati Date: Sun, 28 Sep 2025 19:49:49 -0400 Subject: [PATCH 1/3] skipping pre commit check --- .github/classroom/autograding.json | 54 ++++++++ .github/workflows/classroom.yaml | 16 +++ .pre-commit-config.yaml | 45 +++---- Module2.md | 172 ++++++++++++++++++++++++++ README.md | 57 ++++++++- installation.md | 98 +++++++++++++++ minitorch/autodiff.py | 20 ++- minitorch/module.py | 24 ++-- minitorch/operators.py | 59 ++++----- minitorch/scalar.py | 31 ++--- minitorch/tensor.py | 70 +++++------ minitorch/tensor_data.py | 33 +++-- minitorch/tensor_functions.py | 22 ++-- minitorch/tensor_ops.py | 42 +++---- module2-modernization.md | 191 ++++++++++++++++++++++++++++ pyproject.toml | 192 +++++++++++++++++++++++++++++ requirements.extra.txt | 10 -- requirements.txt | 10 -- setup.cfg | 61 --------- setup.py | 3 - sync_previous_module.py | 90 +++++++++----- testing.md | 146 ++++++++++++++++++++++ tests/tensor_strategies.py | 1 - 23 files changed, 1126 insertions(+), 321 deletions(-) create mode 100644 .github/classroom/autograding.json create mode 100644 .github/workflows/classroom.yaml create mode 100644 Module2.md create mode 100644 installation.md create mode 100644 module2-modernization.md create mode 100644 pyproject.toml delete mode 100644 requirements.extra.txt delete mode 100644 requirements.txt delete mode 100644 setup.cfg delete mode 100644 setup.py create mode 100644 testing.md diff --git a/.github/classroom/autograding.json b/.github/classroom/autograding.json new file mode 100644 index 00000000..9c124daa --- /dev/null +++ b/.github/classroom/autograding.json @@ -0,0 +1,54 @@ +{ + "tests": [ + { + "name": "Style", + "setup": "python -m venv .venv; . .venv/bin/activate; pip3 install -qe .[dev]; pip3 install -q pre-commit", + "run": ". .venv/bin/activate && pre-commit run --all", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.1", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_1", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.2", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_2", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.3", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_3", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.4", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_4", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + } + ] +} \ No newline at end of file diff --git a/.github/workflows/classroom.yaml b/.github/workflows/classroom.yaml new file mode 100644 index 00000000..2853c181 --- /dev/null +++ b/.github/workflows/classroom.yaml @@ -0,0 +1,16 @@ +name: GitHub Classroom Workflow + +on: [push] + +permissions: + checks: write + actions: read + contents: read + +jobs: + build: + name: Autograding + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: education/autograding@v1 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05e8b56d..ae349ac8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -25,38 +25,21 @@ repos: - id: check-toml - id: debug-statements - id: mixed-line-ending - - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/timothycrosley/isort - rev: 5.10.1 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.9 hooks: - - id: isort + # Run the linter. + - id: ruff-check + args: [ --fix ] + # Run the formatter. + - id: ruff-format -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.971 +- repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.404 hooks: - - id: mypy - exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/) - - -# Black, the code formatter, natively supports pre-commit -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - -# Flake8 also supports pre-commit natively (same author) -- repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 - hooks: - - id: flake8 - additional_dependencies: - - pep8-naming - exclude: ^(docs/)|(assignments/)|(project/interface/) - -# Doc linters -- repo: https://github.com/terrencepreilly/darglint - rev: v1.8.1 - hooks: - - id: darglint + - id: pyright + additional_dependencies: + - pytest + - hypothesis \ No newline at end of file diff --git a/Module2.md b/Module2.md new file mode 100644 index 00000000..acc6889e --- /dev/null +++ b/Module2.md @@ -0,0 +1,172 @@ +# Module-2 Assignment Summary + +## Overview +Module-2 introduces **Tensors** - multidimensional arrays that extend and optimize the scalar autodifferentiation system from Module-1. While Module-1's scalar system is correct, it's inefficient due to Python overhead from individual scalar objects and operations. Tensors solve this by grouping operations together and enabling faster implementations. + +## Learning Objectives +- Build efficient tensor data structures with proper indexing and memory layout +- Implement tensor operations (map, zip, reduce) for element-wise and reduction operations +- Extend autodifferentiation to work with tensors and broadcasting +- Create tensor-based neural networks that outperform scalar implementations +- Understand memory optimization through strides, views, and broadcasting + +## Problem Statement +The scalar system from Module-1 has performance issues: +- Every scalar requires building an object +- Each operation stores a complete computation graph +- Training requires repeated operations with Python overhead +- Models like linear regression need inefficient for loops + +**Solution**: Tensors group repeated operations to save Python overhead and delegate to faster implementations. + +## Core Architecture + +### Key Files +- **tensor.py** - User-facing Tensor interface (similar to scalar.py) +- **tensor_data.py** - Core indexing, strides, storage management +- **tensor_ops.py** - Higher-order tensor operations (map, zip, reduce) +- **tensor_functions.py** - Autodifferentiation-ready tensor functions + +### Supporting Files +- **operators.py** - Mathematical operators (inherited from Module-1) +- **autodiff.py** - Autodifferentiation framework (inherited from Module-1) + +## Detailed Task Breakdown + +### Task 2.1: Tensor Data - Indexing +**File**: `minitorch/tensor_data.py` +**Objective**: Implement core tensor backend (`TensorData`) for indexing and storage + +**Functions to Implement**: +1. **`index_to_position(index: Index, strides: Strides) -> int`** + - Converts multidimensional tensor index to single storage position + - Uses strides to calculate memory position + +2. **`to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None`** + - Converts ordinal position (0...size-1) to multidimensional index + - Ensures enumeration produces every index exactly once + - May not be inverse of `index_to_position` + +3. **`TensorData.permute(*order: int) -> TensorData`** + - Permutes tensor dimensions + - Returns new TensorData with same storage, new dimension order + +**Key Concepts**: +- **Storage**: Flat 1D array containing tensor data +- **Shape**: Dimensions of tensor (e.g., (3, 4, 5)) +- **Strides**: Memory navigation pattern (how many positions to skip per dimension) + +### Task 2.2: Tensor Broadcasting +**File**: `minitorch/tensor_data.py` +**Objective**: Implement broadcasting for operations between differently-shaped tensors + +**Functions to Implement**: +1. **`shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape`** + - Creates union shape from two shapes following broadcasting rules + - Raises `IndexingError` if shapes cannot broadcast + +2. **`broadcast_index(big_index, big_shape, shape, out_index) -> None`** + - Converts index from larger tensor to smaller tensor + - Handles dimension mapping (may map to 0 or remove dimensions) + +**Broadcasting Rules**: +- Tensors aligned from rightmost dimension +- Dimensions of size 1 can broadcast to any size +- Missing dimensions treated as size 1 +- Example: (3, 1) + (3, 4) → (3, 4) + +### Task 2.3: Tensor Operations +**File**: `minitorch/tensor_ops.py`, `minitorch/tensor_functions.py`, `minitorch/tensor.py` +**Objective**: Implement high-level tensor operations and user interface + +**Core Operations in `tensor_ops.py`**: +1. **`tensor_map(fn) -> Callable`** + - Applies function element-wise to tensor + - Handles broadcasting between different shapes + +2. **`tensor_zip(fn) -> Callable`** + - Applies binary function element-wise to two tensors + - Supports broadcasting + +3. **`tensor_reduce(fn) -> Callable`** + - Reduces tensor along specified dimension + - Output shape same as input except reduced dimension becomes size 1 + +**Forward Functions in `tensor_functions.py`**: +- **Unary**: Mul, Sigmoid, ReLU, Log, Exp +- **Binary**: LT, EQ, IsClose +- **Reductions**: Sum (with dim argument) +- **Shape Operations**: Permute + +**User Interface in `tensor.py`**: +- **Properties**: size, dims +- **Operators**: add, sub, mul, lt, eq, gt, neg, radd, rmul +- **Functions**: all, is_close, sigmoid, relu, log, exp +- **Reductions**: sum, mean (with optional dim) +- **Shape Operations**: permute, view +- **Utilities**: zero_grad_ + +### Task 2.4: Gradients and Autograd +**File**: `minitorch/tensor_functions.py` +**Objective**: Implement backward functions for tensor autodifferentiation + +**Key Challenges**: +- Gradient computation through broadcasting operations +- Proper gradient aggregation when tensors are broadcast +- Maintaining computation graph for complex tensor operations + +**Similar to Module-1**: Tensors are `Variable` objects supporting autodifferentiation, but now handle multidimensional arrays efficiently. + +### Task 2.5: Training +**File**: `project/run_tensor.py` +**Objective**: Implement tensor-based neural network training + +**Requirements**: +- Three-layer neural network: 2 → Hidden (ReLU) → Hidden (ReLU) → Output (Sigmoid) +- Same functionality as `project/run_scalar.py` but using tensor operations +- Train on all datasets and record results +- Measure and report time per epoch + +## Key Technical Concepts + +### Memory Layout and Strides +- **Contiguous**: Data stored in row-major (C-style) order +- **Strides**: Define memory access pattern for each dimension +- **Views**: Different tensor shapes sharing same underlying storage + +### Tensor Operations Hierarchy +1. **Low-level**: `tensor_ops.py` (map, zip, reduce) +2. **Mid-level**: `tensor_functions.py` (mathematical functions) +3. **High-level**: `tensor.py` (user-friendly interface) + +### Performance Benefits +- **Reduced Python Overhead**: Group operations instead of individual scalars +- **Vectorized Operations**: Delegate to optimized implementations +- **Memory Efficiency**: Shared storage through views and broadcasting +- **Graph Optimization**: Fewer nodes in computation graph + +## Testing Structure +- **task2_1**: `test_tensor_data.py` - Indexing and layout tests +- **task2_2**: `test_tensor_data.py` - Broadcasting tests +- **task2_3**: `test_tensor.py` - Function and operation tests +- **task2_4**: `test_tensor.py` - Autodifferentiation tests + +## Expected Performance Improvement +Moving from scalar to tensor implementation should provide: +- Significant speedup in training time per epoch +- Reduced memory usage through efficient storage +- Better scalability for larger models and datasets + +## Integration with Previous Modules +**Synced Files from Module-1**: +- `minitorch/operators.py` - Mathematical operators +- `minitorch/module.py` - Neural network module framework +- `minitorch/autodiff.py` - Core autodifferentiation system +- `minitorch/scalar.py` - Scalar implementation +- `project/run_manual.py` - Manual gradient checking +- `project/run_scalar.py` - Scalar-based training + +## Debugging Tools +- **Expression Visualization**: `streamlit run project/app.py -- 2` +- **Graph Builder**: View computation graphs for tensor operations +- **Tensor Debugging**: Interactive tools for understanding tensor operations \ No newline at end of file diff --git a/README.md b/README.md index 9304eaab..86d30044 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,66 @@ +**Tensors** - Extending Autodifferentiation to Multidimensional Arrays * Docs: https://minitorch.github.io/ - * Overview: https://minitorch.github.io/module2/module2/ -This assignment requires the following files from the previous assignments. You can get these by running +## Overview + +Module 2 introduces **Tensors** - multidimensional arrays that extend the scalar autodifferentiation system from Module 1. While the scalar system is correct, it's inefficient due to Python overhead. Tensors solve this by grouping operations together and enabling faster implementations. + +## Installation + +See [installation.md](installation.md) for detailed setup instructions. + +## Quick Start + +```bash +# Install dependencies +pip install -e ".[dev,extra]" + +# Sync files from Module 1 +python sync_previous_module.py ../Module-1 . + +# Verify installation +python -c "import minitorch; print('Success!')" + +# Run tests +pytest -m task2_1 # Tensor data and indexing +pytest -m task2_2 # Tensor broadcasting +pytest -m task2_3 # Tensor operations +pytest -m task2_4 # Tensor autodifferentiation + +# Train tensor-based model +python project/run_tensor.py +``` + +## Tasks + +- **Task 2.1**: Implement tensor data structures with indexing and strides +- **Task 2.2**: Implement tensor broadcasting for operations between different shapes +- **Task 2.3**: Implement tensor operations (map, zip, reduce) and mathematical functions +- **Task 2.4**: Extend autodifferentiation to work with tensors and broadcasting +- **Task 2.5**: Create tensor-based neural network training + +## Testing + +See [testing.md](testing.md) for detailed testing instructions. + +## Files + +This assignment requires the following files from Module 1. You can get these by running: ```bash -python sync_previous_module.py previous-module-dir current-module-dir +python sync_previous_module.py ../Module-1 . ``` The files that will be synced are: - minitorch/operators.py minitorch/module.py minitorch/autodiff.py minitorch/scalar.py minitorch/module.py project/run_manual.py project/run_scalar.py \ No newline at end of file +- `minitorch/operators.py` +- `minitorch/module.py` +- `minitorch/autodiff.py` +- `minitorch/scalar.py` +- `project/run_manual.py` +- `project/run_scalar.py` \ No newline at end of file diff --git a/installation.md b/installation.md new file mode 100644 index 00000000..2ae0765e --- /dev/null +++ b/installation.md @@ -0,0 +1,98 @@ +--- +hide: + - navigation +--- + +# MiniTorch Module 2 Installation + +MiniTorch requires Python 3.8 or higher. To check your version of Python, run: + +```bash +>>> python --version +``` + +We recommend creating a global MiniTorch workspace directory that you will use +for all modules: + +```bash +>>> mkdir workspace; cd workspace +``` + +## Environment Setup + +We highly recommend setting up a *virtual environment*. The virtual environment lets you install packages that are only used for your assignments and do not impact the rest of the system. + +**Option 1: Anaconda (Recommended)** +```bash +>>> conda create --name minitorch python # Run only once +>>> conda activate minitorch +>>> conda install llvmlite # For optimization +``` + +**Option 2: Venv** +```bash +>>> python -m venv venv # Run only once +>>> source venv/bin/activate +``` + +The first line should be run only once, whereas the second needs to be run whenever you open a new terminal to get started for the class. You can tell if it works by checking if your terminal starts with `(minitorch)` or `(venv)`. + +## Getting the Code + +Each assignment is distributed through a Git repo. Once you accept the assignment from GitHub Classroom, a personal repository under Cornell-Tech-ML will be created for you. You can then clone this repository to start working on your assignment. + +```bash +>>> git clone {{ASSIGNMENT}} +>>> cd {{ASSIGNMENT}} +``` + +## Syncing Previous Module Files + +Module 2 requires files from Module 0 and Module 1. Sync them using: + +```bash +>>> python sync_previous_module.py +``` + +Example: +```bash +>>> python sync_previous_module.py ../Module-1 . +``` + +Replace `` with the path to your Module 1 directory and `` with `.` for the current directory. + +This will copy the following required files: +- `minitorch/operators.py` +- `minitorch/module.py` +- `minitorch/autodiff.py` +- `minitorch/scalar.py` +- `tests/test_module.py` +- `tests/test_operators.py` +- `tests/test_autodiff.py` +- `tests/test_scalar.py` +- `project/run_manual.py` +- `project/run_scalar.py` + +## Installation + +Install all packages in your virtual environment: + +```bash +>>> python -m pip install -e ".[dev,extra]" +``` + +## Verification + +Make sure everything is installed by running: + +```bash +>>> python -c "import minitorch; print('Success!')" +``` + +Verify that the tensor functionality is available: + +```bash +>>> python -c "from minitorch import tensor; print('Module 2 ready!')" +``` + +You're ready to start Module 2! \ No newline at end of file diff --git a/minitorch/autodiff.py b/minitorch/autodiff.py index f7fa3b36..7d526be8 100644 --- a/minitorch/autodiff.py +++ b/minitorch/autodiff.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Iterable, List, Tuple +from typing import Any, Iterable, Tuple from typing_extensions import Protocol @@ -8,8 +8,7 @@ def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) -> Any: - r""" - Computes an approximation to the derivative of `f` with respect to one arg. + r"""Computes an approximation to the derivative of `f` with respect to one arg. See :doc:`derivative` or https://en.wikipedia.org/wiki/Finite_difference for more details. @@ -21,6 +20,7 @@ def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) Returns: An approximation of $f'_i(x_0, \ldots, x_{n-1})$ + """ raise NotImplementedError("Need to include this file from past assignment.") @@ -51,21 +51,20 @@ def chain_rule(self, d_output: Any) -> Iterable[Tuple["Variable", Any]]: def topological_sort(variable: Variable) -> Iterable[Variable]: - """ - Computes the topological order of the computation graph. + """Computes the topological order of the computation graph. Args: variable: The right-most variable Returns: Non-constant Variables in topological order starting from the right. + """ raise NotImplementedError("Need to include this file from past assignment.") def backpropagate(variable: Variable, deriv: Any) -> None: - """ - Runs backpropagation on the computation graph in order to + """Runs backpropagation on the computation graph in order to compute derivatives for the leave nodes. Args: @@ -73,21 +72,20 @@ def backpropagate(variable: Variable, deriv: Any) -> None: deriv : Its derivative that we want to propagate backward to the leaves. No return. Should write to its results to the derivative values of each leaf through `accumulate_derivative`. + """ raise NotImplementedError("Need to include this file from past assignment.") @dataclass class Context: - """ - Context class is used by `Function` to store information during the forward pass. - """ + """Context class is used by `Function` to store information during the forward pass.""" no_grad: bool = False saved_values: Tuple[Any, ...] = () def save_for_backward(self, *values: Any) -> None: - "Store the given `values` if they need to be used during backpropagation." + """Store the given `values` if they need to be used during backpropagation.""" if self.no_grad: return self.saved_values = values diff --git a/minitorch/module.py b/minitorch/module.py index 11fc1f39..01b6aac1 100644 --- a/minitorch/module.py +++ b/minitorch/module.py @@ -4,8 +4,7 @@ class Module: - """ - Modules form a tree that store parameters and other + """Modules form a tree that store parameters and other submodules. They make up the basis of neural network stacks. Attributes: @@ -25,35 +24,34 @@ def __init__(self) -> None: self.training = True def modules(self) -> Sequence[Module]: - "Return the direct child modules of this module." + """Return the direct child modules of this module.""" m: Dict[str, Module] = self.__dict__["_modules"] return list(m.values()) def train(self) -> None: - "Set the mode of this module and all descendent modules to `train`." + """Set the mode of this module and all descendent modules to `train`.""" raise NotImplementedError("Need to include this file from past assignment.") def eval(self) -> None: - "Set the mode of this module and all descendent modules to `eval`." + """Set the mode of this module and all descendent modules to `eval`.""" raise NotImplementedError("Need to include this file from past assignment.") def named_parameters(self) -> Sequence[Tuple[str, Parameter]]: - """ - Collect all the parameters of this module and its descendents. + """Collect all the parameters of this module and its descendents. Returns: The name and `Parameter` of each ancestor parameter. + """ raise NotImplementedError("Need to include this file from past assignment.") def parameters(self) -> Sequence[Parameter]: - "Enumerate over all the parameters of this module and its descendents." + """Enumerate over all the parameters of this module and its descendents.""" raise NotImplementedError("Need to include this file from past assignment.") def add_parameter(self, k: str, v: Any) -> Parameter: - """ - Manually add a parameter. Useful helper for scalar parameters. + """Manually add a parameter. Useful helper for scalar parameters. Args: k: Local name of the parameter. @@ -61,6 +59,7 @@ def add_parameter(self, k: str, v: Any) -> Parameter: Returns: Newly created parameter. + """ val = Parameter(v, k) self.__dict__["_parameters"][k] = val @@ -114,8 +113,7 @@ def _addindent(s_: str, numSpaces: int) -> str: class Parameter: - """ - A Parameter is a special container stored in a `Module`. + """A Parameter is a special container stored in a `Module`. It is designed to hold a `Variable`, but we allow it to hold any value for testing. @@ -130,7 +128,7 @@ def __init__(self, x: Any, name: Optional[str] = None) -> None: self.value.name = self.name def update(self, x: Any) -> None: - "Update the parameter value." + """Update the parameter value.""" self.value = x if hasattr(x, "requires_grad_"): self.value.requires_grad_(True) diff --git a/minitorch/operators.py b/minitorch/operators.py index 895ae82d..ef455ba0 100644 --- a/minitorch/operators.py +++ b/minitorch/operators.py @@ -1,6 +1,4 @@ -""" -Collection of the core mathematical operators used throughout the code base. -""" +"""Collection of the core mathematical operators used throughout the code base.""" import math from typing import Callable, Iterable @@ -11,48 +9,47 @@ def mul(x: float, y: float) -> float: - "$f(x, y) = x * y$" + """$f(x, y) = x * y$""" raise NotImplementedError("Need to include this file from past assignment.") def id(x: float) -> float: - "$f(x) = x$" + """$f(x) = x$""" raise NotImplementedError("Need to include this file from past assignment.") def add(x: float, y: float) -> float: - "$f(x, y) = x + y$" + """$f(x, y) = x + y$""" raise NotImplementedError("Need to include this file from past assignment.") def neg(x: float) -> float: - "$f(x) = -x$" + """$f(x) = -x$""" raise NotImplementedError("Need to include this file from past assignment.") def lt(x: float, y: float) -> float: - "$f(x) =$ 1.0 if x is less than y else 0.0" + """$f(x) =$ 1.0 if x is less than y else 0.0""" raise NotImplementedError("Need to include this file from past assignment.") def eq(x: float, y: float) -> float: - "$f(x) =$ 1.0 if x is equal to y else 0.0" + """$f(x) =$ 1.0 if x is equal to y else 0.0""" raise NotImplementedError("Need to include this file from past assignment.") def max(x: float, y: float) -> float: - "$f(x) =$ x if x is greater than y else y" + """$f(x) =$ x if x is greater than y else y""" raise NotImplementedError("Need to include this file from past assignment.") def is_close(x: float, y: float) -> float: - "$f(x) = |x - y| < 1e-2$" + """$f(x) = |x - y| < 1e-2$""" raise NotImplementedError("Need to include this file from past assignment.") def sigmoid(x: float) -> float: - r""" - $f(x) = \frac{1.0}{(1.0 + e^{-x})}$ + r"""$f(x) = \frac{1.0}{(1.0 + e^{-x})}$ (See https://en.wikipedia.org/wiki/Sigmoid_function ) @@ -66,8 +63,7 @@ def sigmoid(x: float) -> float: def relu(x: float) -> float: - """ - $f(x) =$ x if x is greater than 0, else 0 + """$f(x) =$ x if x is greater than 0, else 0 (See https://en.wikipedia.org/wiki/Rectifier_(neural_networks) .) """ @@ -78,32 +74,32 @@ def relu(x: float) -> float: def log(x: float) -> float: - "$f(x) = log(x)$" + """$f(x) = log(x)$""" return math.log(x + EPS) def exp(x: float) -> float: - "$f(x) = e^{x}$" + """$f(x) = e^{x}$""" return math.exp(x) def log_back(x: float, d: float) -> float: - r"If $f = log$ as above, compute $d \times f'(x)$" + r"""If $f = log$ as above, compute $d \times f'(x)$""" raise NotImplementedError("Need to include this file from past assignment.") def inv(x: float) -> float: - "$f(x) = 1/x$" + """$f(x) = 1/x$""" raise NotImplementedError("Need to include this file from past assignment.") def inv_back(x: float, d: float) -> float: - r"If $f(x) = 1/x$ compute $d \times f'(x)$" + r"""If $f(x) = 1/x$ compute $d \times f'(x)$""" raise NotImplementedError("Need to include this file from past assignment.") def relu_back(x: float, d: float) -> float: - r"If $f = relu$ compute $d \times f'(x)$" + r"""If $f = relu$ compute $d \times f'(x)$""" raise NotImplementedError("Need to include this file from past assignment.") @@ -113,8 +109,7 @@ def relu_back(x: float, d: float) -> float: def map(fn: Callable[[float], float]) -> Callable[[Iterable[float]], Iterable[float]]: - """ - Higher-order map. + """Higher-order map. See https://en.wikipedia.org/wiki/Map_(higher-order_function) @@ -124,20 +119,20 @@ def map(fn: Callable[[float], float]) -> Callable[[Iterable[float]], Iterable[fl Returns: A function that takes a list, applies `fn` to each element, and returns a new list + """ raise NotImplementedError("Need to include this file from past assignment.") def negList(ls: Iterable[float]) -> Iterable[float]: - "Use `map` and `neg` to negate each element in `ls`" + """Use `map` and `neg` to negate each element in `ls`""" raise NotImplementedError("Need to include this file from past assignment.") def zipWith( - fn: Callable[[float, float], float] + fn: Callable[[float, float], float], ) -> Callable[[Iterable[float], Iterable[float]], Iterable[float]]: - """ - Higher-order zipwith (or map2). + """Higher-order zipwith (or map2). See https://en.wikipedia.org/wiki/Map_(higher-order_function) @@ -153,15 +148,14 @@ def zipWith( def addLists(ls1: Iterable[float], ls2: Iterable[float]) -> Iterable[float]: - "Add the elements of `ls1` and `ls2` using `zipWith` and `add`" + """Add the elements of `ls1` and `ls2` using `zipWith` and `add`""" raise NotImplementedError("Need to include this file from past assignment.") def reduce( fn: Callable[[float, float], float], start: float ) -> Callable[[Iterable[float]], float]: - r""" - Higher-order reduce. + r"""Higher-order reduce. Args: fn: combine two values @@ -171,15 +165,16 @@ def reduce( Function that takes a list `ls` of elements $x_1 \ldots x_n$ and computes the reduction :math:`fn(x_3, fn(x_2, fn(x_1, x_0)))` + """ raise NotImplementedError("Need to include this file from past assignment.") def sum(ls: Iterable[float]) -> float: - "Sum up a list using `reduce` and `add`." + """Sum up a list using `reduce` and `add`.""" raise NotImplementedError("Need to include this file from past assignment.") def prod(ls: Iterable[float]) -> float: - "Product of a list using `reduce` and `mul`." + """Product of a list using `reduce` and `mul`.""" raise NotImplementedError("Need to include this file from past assignment.") diff --git a/minitorch/scalar.py b/minitorch/scalar.py index 3c853a2e..7d1e9ca0 100644 --- a/minitorch/scalar.py +++ b/minitorch/scalar.py @@ -7,17 +7,9 @@ from .autodiff import Context, Variable, backpropagate, central_difference from .scalar_functions import ( - EQ, - LT, - Add, - Exp, Inv, - Log, Mul, - Neg, - ReLU, ScalarFunction, - Sigmoid, ) ScalarLike = Union[float, int, "Scalar"] @@ -25,8 +17,7 @@ @dataclass class ScalarHistory: - """ - `ScalarHistory` stores the history of `Function` operations that was + """`ScalarHistory` stores the history of `Function` operations that was used to construct the current Variable. Attributes: @@ -48,8 +39,7 @@ class ScalarHistory: class Scalar: - """ - A reimplementation of scalar values for autodifferentiation + """A reimplementation of scalar values for autodifferentiation tracking. Scalar Variables behave as close as possible to standard Python numbers while also tracking the operations that led to the number's creation. They can only be manipulated by @@ -133,12 +123,12 @@ def relu(self) -> Scalar: # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """ - Add `val` to the the derivative accumulated on this variable. + """Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: x: value to be accumulated + """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.derivative is None: @@ -146,7 +136,7 @@ def accumulate_derivative(self, x: Any) -> None: self.derivative += x def is_leaf(self) -> bool: - "True if this variable created by the user (no `last_fn`)" + """True if this variable created by the user (no `last_fn`)""" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -166,12 +156,12 @@ def chain_rule(self, d_output: Any) -> Iterable[Tuple[Variable, Any]]: raise NotImplementedError("Need to include this file from past assignment.") def backward(self, d_output: Optional[float] = None) -> None: - """ - Calls autodiff to fill in the derivatives for the history of this object. + """Calls autodiff to fill in the derivatives for the history of this object. Args: d_output (number, opt): starting derivative to backpropagate through the model (typically left out, and assumed to be 1.0). + """ if d_output is None: d_output = 1.0 @@ -179,13 +169,14 @@ def backward(self, d_output: Optional[float] = None) -> None: def derivative_check(f: Any, *scalars: Scalar) -> None: - """ - Checks that autodiff works on a python function. + """Checks that autodiff works on a python function. Asserts False if derivative is incorrect. - Parameters: + Parameters + ---------- f : function from n-scalars to 1-scalar. *scalars : n input scalar values. + """ out = f(*scalars) out.backward() diff --git a/minitorch/tensor.py b/minitorch/tensor.py index c675699b..05179917 100644 --- a/minitorch/tensor.py +++ b/minitorch/tensor.py @@ -1,6 +1,4 @@ -""" -Implementation of the core Tensor object for autodifferentiation. -""" +"""Implementation of the core Tensor object for autodifferentiation.""" from __future__ import annotations @@ -47,8 +45,7 @@ @dataclass class History: - """ - `History` stores the history of `Function` operations that was + """`History` stores the history of `Function` operations that was used to construct the current Variable. """ @@ -61,8 +58,7 @@ class History: class Tensor: - """ - Tensor is a generalization of Scalar in that it is a Variable that + """Tensor is a generalization of Scalar in that it is a Variable that handles multidimensional arrays. """ @@ -103,39 +99,39 @@ def requires_grad(self) -> bool: return self.history is not None def to_numpy(self) -> npt.NDArray[np.float64]: - """ - Returns: - Converted to numpy array + """Returns: + Converted to numpy array + """ return self.contiguous()._tensor._storage.reshape(self.shape) # Properties @property def shape(self) -> UserShape: - """ - Returns: - shape of the tensor + """Returns: + shape of the tensor + """ return self._tensor.shape @property def size(self) -> int: - """ - Returns: - int : size of the tensor + """Returns: + int : size of the tensor + """ return self._tensor.size @property def dims(self) -> int: - """ - Returns: - int : dimensionality of the tensor + """Returns: + int : dimensionality of the tensor + """ return self._tensor.dims def _ensure_tensor(self, b: TensorLike) -> Tensor: - "Turns a python number into a tensor with the same backend." + """Turns a python number into a tensor with the same backend.""" if isinstance(b, (int, float)): c = Tensor.make([b], (1,), backend=self.backend) else: @@ -160,7 +156,7 @@ def __rtruediv__(self, b: TensorLike) -> Tensor: return Mul.apply(self._ensure_tensor(b), Inv.apply(self)) def __matmul__(self, b: Tensor) -> Tensor: - "Not used until Module 3" + """Not used until Module 3""" return MatMul.apply(self, b) def __lt__(self, b: TensorLike) -> Tensor: @@ -207,29 +203,29 @@ def item(self) -> float: return self[0] def sum(self, dim: Optional[int] = None) -> Tensor: - "Compute the sum over dimension `dim`" + """Compute the sum over dimension `dim`""" if dim is None: return Sum.apply(self.contiguous().view(self.size), self._ensure_tensor(0)) else: return Sum.apply(self, self._ensure_tensor(dim)) def mean(self, dim: Optional[int] = None) -> Tensor: - "Compute the mean over dimension `dim`" + """Compute the mean over dimension `dim`""" if dim is not None: return self.sum(dim) / self.shape[dim] else: return self.sum() / self.size def permute(self, *order: int) -> Tensor: - "Permute tensor dimensions to *order" + """Permute tensor dimensions to *order""" return Permute.apply(self, tensor(list(order))) def view(self, *shape: int) -> Tensor: - "Change the shape of the tensor to a new shape with the same size" + """Change the shape of the tensor to a new shape with the same size""" return View.apply(self, tensor(list(shape))) def contiguous(self) -> Tensor: - "Return a contiguous tensor with the same data" + """Return a contiguous tensor with the same data""" return Copy.apply(self) def __repr__(self) -> str: @@ -259,24 +255,24 @@ def make( strides: Optional[UserStrides] = None, backend: Optional[TensorBackend] = None, ) -> Tensor: - "Create a new tensor from data" + """Create a new tensor from data""" return Tensor(TensorData(storage, shape, strides), backend=backend) def expand(self, other: Tensor) -> Tensor: - """ - Method used to allow for backprop over broadcasting. + """Method used to allow for backprop over broadcasting. This method is called when the output of `backward` is a different size than the input of `forward`. - Parameters: + Parameters + ---------- other : backward tensor (must broadcast with self) - Returns: + Returns + ------- Expanded version of `other` with the right derivatives """ - # Case 1: Both the same shape. if self.shape == other.shape: return other @@ -321,12 +317,12 @@ def detach(self) -> Tensor: # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """ - Add `val` to the the derivative accumulated on this variable. + """Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: x : value to be accumulated + """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.grad is None: @@ -336,7 +332,7 @@ def accumulate_derivative(self, x: Any) -> None: self.grad += x def is_leaf(self) -> bool: - "True if this variable created by the user (no `last_fn`)" + """True if this variable created by the user (no `last_fn`)""" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -367,7 +363,5 @@ def backward(self, grad_output: Optional[Tensor] = None) -> None: backpropagate(self, grad_output) def zero_grad_(self) -> None: # pragma: no cover - """ - Reset the derivative on this variable. - """ + """Reset the derivative on this variable.""" self.grad = None diff --git a/minitorch/tensor_data.py b/minitorch/tensor_data.py index 452b7904..22b42400 100644 --- a/minitorch/tensor_data.py +++ b/minitorch/tensor_data.py @@ -15,7 +15,8 @@ class IndexingError(RuntimeError): - "Exception raised for indexing errors." + """Exception raised for indexing errors.""" + pass @@ -31,8 +32,7 @@ class IndexingError(RuntimeError): def index_to_position(index: Index, strides: Strides) -> int: - """ - Converts a multidimensional tensor `index` into a single-dimensional position in + """Converts a multidimensional tensor `index` into a single-dimensional position in storage based on strides. Args: @@ -41,15 +41,14 @@ def index_to_position(index: Index, strides: Strides) -> int: Returns: Position in storage - """ + """ # TODO: Implement for Task 2.1. raise NotImplementedError("Need to implement for Task 2.1") def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: - """ - Convert an `ordinal` to an index in the `shape`. + """Convert an `ordinal` to an index in the `shape`. Should ensure that enumerating position 0 ... size of a tensor produces every index exactly once. It may not be the inverse of `index_to_position`. @@ -67,8 +66,7 @@ def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: def broadcast_index( big_index: Index, big_shape: Shape, shape: Shape, out_index: OutIndex ) -> None: - """ - Convert a `big_index` into `big_shape` to a smaller `out_index` + """Convert a `big_index` into `big_shape` to a smaller `out_index` into `shape` following broadcasting rules. In this case it may be larger or with more dimensions than the `shape` given. Additional dimensions may need to be mapped to 0 or @@ -82,14 +80,14 @@ def broadcast_index( Returns: None + """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") def shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape: - """ - Broadcast two shapes to create a new union shape. + """Broadcast two shapes to create a new union shape. Args: shape1 : first shape @@ -100,6 +98,7 @@ def shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape: Raises: IndexingError : if cannot broadcast + """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") @@ -153,11 +152,11 @@ def to_cuda_(self) -> None: # pragma: no cover self._storage = numba.cuda.to_device(self._storage) def is_contiguous(self) -> bool: - """ - Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. + """Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. Returns: bool : True if contiguous + """ last = 1e9 for stride in self._strides: @@ -209,18 +208,18 @@ def tuple(self) -> Tuple[Storage, Shape, Strides]: return (self._storage, self._shape, self._strides) def permute(self, *order: int) -> TensorData: - """ - Permute the dimensions of the tensor. + """Permute the dimensions of the tensor. Args: order (list): a permutation of the dimensions Returns: New `TensorData` with the same storage and a new dimension order. + """ - assert list(sorted(order)) == list( - range(len(self.shape)) - ), f"Must give a position to each dimension. Shape: {self.shape} Order: {order}" + assert list(sorted(order)) == list(range(len(self.shape))), ( + f"Must give a position to each dimension. Shape: {self.shape} Order: {order}" + ) # TODO: Implement for Task 2.1. raise NotImplementedError("Need to implement for Task 2.1") diff --git a/minitorch/tensor_functions.py b/minitorch/tensor_functions.py index 86db01a1..3c63b0df 100644 --- a/minitorch/tensor_functions.py +++ b/minitorch/tensor_functions.py @@ -1,6 +1,4 @@ -""" -Implementation of the autodifferentiation Functions for Tensor. -""" +"""Implementation of the autodifferentiation Functions for Tensor.""" from __future__ import annotations @@ -23,7 +21,7 @@ def wrap_tuple(x): # type: ignore - "Turn a possible value into a tuple" + """Turn a possible value into a tuple""" if isinstance(x, tuple): return x return (x,) @@ -274,8 +272,7 @@ def transpose(a: Tensor) -> Tensor: # Helpers for Constructing tensors def zeros(shape: UserShape, backend: TensorBackend = SimpleBackend) -> Tensor: - """ - Produce a zero tensor of size `shape`. + """Produce a zero tensor of size `shape`. Args: shape : shape of tensor @@ -283,6 +280,7 @@ def zeros(shape: UserShape, backend: TensorBackend = SimpleBackend) -> Tensor: Returns: new tensor + """ return minitorch.Tensor.make( [0] * int(operators.prod(shape)), shape, backend=backend @@ -294,8 +292,7 @@ def rand( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """ - Produce a random tensor of size `shape`. + """Produce a random tensor of size `shape`. Args: shape : shape of tensor @@ -304,6 +301,7 @@ def rand( Returns: :class:`Tensor` : new tensor + """ vals = [random.random() for _ in range(int(operators.prod(shape)))] tensor = minitorch.Tensor.make(vals, shape, backend=backend) @@ -317,8 +315,7 @@ def _tensor( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """ - Produce a tensor with data ls and shape `shape`. + """Produce a tensor with data ls and shape `shape`. Args: ls: data for tensor @@ -328,6 +325,7 @@ def _tensor( Returns: new tensor + """ tensor = minitorch.Tensor.make(ls, shape, backend=backend) tensor.requires_grad_(requires_grad) @@ -337,8 +335,7 @@ def _tensor( def tensor( ls: Any, backend: TensorBackend = SimpleBackend, requires_grad: bool = False ) -> Tensor: - """ - Produce a tensor with data and shape from ls + """Produce a tensor with data and shape from ls Args: ls: data for tensor @@ -347,6 +344,7 @@ def tensor( Returns: :class:`Tensor` : new tensor + """ def shape(ls: Any) -> List[int]: diff --git a/minitorch/tensor_ops.py b/minitorch/tensor_ops.py index 96411b42..9683d144 100644 --- a/minitorch/tensor_ops.py +++ b/minitorch/tensor_ops.py @@ -2,26 +2,20 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Type -import numpy as np from typing_extensions import Protocol from . import operators from .tensor_data import ( - MAX_DIMS, - broadcast_index, - index_to_position, shape_broadcast, - to_index, ) if TYPE_CHECKING: from .tensor import Tensor - from .tensor_data import Index, Shape, Storage, Strides + from .tensor_data import Shape, Storage, Strides class MapProto(Protocol): - def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: - ... + def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: ... class TensorOps: @@ -52,8 +46,7 @@ def matrix_multiply(a: Tensor, b: Tensor) -> Tensor: class TensorBackend: def __init__(self, ops: Type[TensorOps]): - """ - Dynamically construct a tensor backend based on a `tensor_ops` object + """Dynamically construct a tensor backend based on a `tensor_ops` object that implements map, zip, and reduce higher-order functions. Args: @@ -64,7 +57,6 @@ def __init__(self, ops: Type[TensorOps]): A collection of tensor functions """ - # Maps self.neg_map = ops.map(operators.neg) self.sigmoid_map = ops.map(operators.sigmoid) @@ -95,8 +87,7 @@ def __init__(self, ops: Type[TensorOps]): class SimpleOps(TensorOps): @staticmethod def map(fn: Callable[[float], float]) -> MapProto: - """ - Higher-order tensor map function :: + """Higher-order tensor map function :: fn_map = map(fn) fn_map(a, out) @@ -122,8 +113,8 @@ def map(fn: Callable[[float], float]) -> MapProto: Returns: new tensor data - """ + """ f = tensor_map(fn) def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @@ -136,10 +127,9 @@ def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @staticmethod def zip( - fn: Callable[[float, float], float] + fn: Callable[[float, float], float], ) -> Callable[["Tensor", "Tensor"], "Tensor"]: - """ - Higher-order tensor zip function :: + """Higher-order tensor zip function :: fn_zip = zip(fn) out = fn_zip(a, b) @@ -164,8 +154,8 @@ def zip( Returns: :class:`TensorData` : new tensor data - """ + """ f = tensor_zip(fn) def ret(a: "Tensor", b: "Tensor") -> "Tensor": @@ -183,8 +173,7 @@ def ret(a: "Tensor", b: "Tensor") -> "Tensor": def reduce( fn: Callable[[float, float], float], start: float = 0.0 ) -> Callable[["Tensor", int], "Tensor"]: - """ - Higher-order tensor reduce function. :: + """Higher-order tensor reduce function. :: fn_reduce = reduce(fn) out = fn_reduce(a, dim) @@ -204,6 +193,7 @@ def reduce( Returns: :class:`TensorData` : new tensor + """ f = tensor_reduce(fn) @@ -231,8 +221,7 @@ def matrix_multiply(a: "Tensor", b: "Tensor") -> "Tensor": def tensor_map(fn: Callable[[float], float]) -> Any: - """ - Low-level implementation of tensor map between + """Low-level implementation of tensor map between tensors with *possibly different strides*. Simple version: @@ -258,6 +247,7 @@ def tensor_map(fn: Callable[[float], float]) -> Any: Returns: None : Fills in `out` + """ def _map( @@ -275,8 +265,7 @@ def _map( def tensor_zip(fn: Callable[[float, float], float]) -> Any: - """ - Low-level implementation of tensor zip between + """Low-level implementation of tensor zip between tensors with *possibly different strides*. Simple version: @@ -305,6 +294,7 @@ def tensor_zip(fn: Callable[[float, float], float]) -> Any: Returns: None : Fills in `out` + """ def _zip( @@ -325,8 +315,7 @@ def _zip( def tensor_reduce(fn: Callable[[float, float], float]) -> Any: - """ - Low-level implementation of tensor reduce. + """Low-level implementation of tensor reduce. * `out_shape` will be the same as `a_shape` except with `reduce_dim` turned to size `1` @@ -343,6 +332,7 @@ def tensor_reduce(fn: Callable[[float, float], float]) -> Any: Returns: None : Fills in `out` + """ def _reduce( diff --git a/module2-modernization.md b/module2-modernization.md new file mode 100644 index 00000000..24041047 --- /dev/null +++ b/module2-modernization.md @@ -0,0 +1,191 @@ +# Module-2 Modernization Plan + +## Overview +This document outlines the step-by-step modernization of Module-2 to match the modern Python development practices implemented in Module-0 and successfully applied to Module-1. + +## Current State Analysis + +### Legacy Files Present +- `setup.py` - Minimal setup file (contains only `py_modules=[]`) +- `setup.cfg` - Contains tool configurations and version 0.4 +- `requirements.txt` - Legacy dependency management +- `requirements.extra.txt` - Additional dependencies +- No modern packaging or CI/CD infrastructure + +### Module-2 Specific Considerations +- **Version**: Currently 0.4, should update to 0.6 (Module-0: 0.4, Module-1: 0.5) +- **Tasks**: Only task2_* tests (task2_1, task2_2, task2_3, task2_4, task2_5) +- **Dependencies**: More extensive than Module-1 (includes streamlit, plotly, torch, etc.) +- **Inherited Tasks**: task0_*, task1_* synced from previous modules, not tested in CI + +## Modernization Steps Required + +### ✅ Step 0: Analysis and Planning (COMPLETED) +- [x] Read Module-1 modernization documentation +- [x] Analyze Module-2 current structure and dependencies +- [x] Create Module2.md assignment summary +- [x] Create this modernization plan + +### 📋 Step 1: Create Modern pyproject.toml +**Action**: Migrate from legacy setup.py + requirements.txt to modern pyproject.toml +- **Create**: `Module-2/pyproject.toml` +- **Version**: Update from 0.4 to 0.6 +- **Dependencies**: Migrate and update all packages with modern versions +- **Build system**: Use `hatchling` backend (consistent with Module-0/1) +- **Configuration**: Add Ruff, Pyright, and pytest configurations + +**Dependency Migration Strategy**: +- **Base requirements.txt**: colorama, hypothesis, mypy, numba, numpy, pre-commit, pytest, pytest-env, pytest-runner, typing_extensions +- **Extra requirements.txt**: datasets, embeddings, networkx, plotly, pydot, python-mnist, streamlit, streamlit-ace, torch, watchdog +- **Version Updates**: Follow Module-1 pattern for common dependencies +- **New Dependencies**: Preserve Module-2 specific packages (streamlit, plotly, etc.) + +### 📋 Step 2: Remove Legacy setup.py +**Action**: Remove minimal setup.py file +- **Remove**: `Module-2/setup.py` +- **Reason**: Replaced by pyproject.toml modern packaging + +### 📋 Step 3: Remove Legacy Requirements Files +**Action**: Remove both requirements files after verifying all dependencies migrated +- **Remove**: `Module-2/requirements.txt` +- **Remove**: `Module-2/requirements.extra.txt` +- **Verification**: Ensure all dependencies properly migrated to pyproject.toml + +### 📋 Step 4: Remove/Modernize setup.cfg +**Action**: Remove setup.cfg after migrating relevant configurations +- **Remove**: `Module-2/setup.cfg` +- **Migration**: Move any necessary configurations to pyproject.toml +- **Note**: Tool configurations (flake8, mypy, black, isort, darglint) will be replaced by modern Ruff/Pyright + +### 📋 Step 5: Create Modern Pre-commit Configuration +**Action**: Create modern pre-commit hooks to match Module-0/1 +- **Create**: `Module-2/.pre-commit-config.yaml` +- **Content**: Copy exactly from Module-1 (modern toolchain) +- **Tools**: Use Ruff (replaces Black/Flake8/isort) and Pyright (replaces mypy) + +### 📋 Step 6: Create GitHub Actions CI/CD +**Action**: Add automated testing and grading infrastructure +- **Create**: `Module-2/.github/workflows/classroom.yaml` +- **Content**: Copy from Module-1 (GitHub Classroom integration) + +### 📋 Step 7: Create Module-2 Specific Autograding +**Action**: Create autograding configuration for Module-2 tasks only +- **Create**: `Module-2/.github/classroom/autograding.json` +- **Tests**: Only task2_* tests (task2_1, task2_2, task2_3, task2_4, task2_5) + Style check +- **Reasoning**: Module-2 syncs task0_*, task1_* from previous modules, only tests its own tasks + +### 📋 Step 8: Create Documentation Files +**Action**: Create modern documentation matching Module-1 +- **Create**: `Module-2/installation.md` (copy from Module-1, generic content) +- **Create**: `Module-2/testing.md` (adapt for Module-2 specific tasks) +- **Update**: `Module-2/README.md` (modernize and improve) + +### 📋 Step 9: Verification and Testing +**Action**: Ensure modernized setup works correctly +- **Test**: `pip install -e ".[dev,extra]"` installation +- **Test**: `pre-commit run --all-files` functionality +- **Test**: Module-2 specific tasks (pytest -m task2_1, task2_2, etc.) +- **Test**: Import verification (`python -c "import minitorch; print('Success!')"`) + +## Module-2 Specific Dependency Analysis + +### Core Dependencies (requirements.txt) +``` +colorama==0.4.3 → 0.4.6 (update to match Module-1) +hypothesis==6.54 → 6.138.2 (update to match Module-1) +mypy==0.971 → Remove (replace with Pyright) +numba==0.56 → 0.61.2 (update to match Module-1) +numpy==1.22 → >=1.24,<2.3 (update to match Module-1) +pre-commit==2.20.0 → 4.3.0 (move to [dev] group) +pytest==7.1.2 → 8.4.1 (update to match Module-1) +pytest-env → Keep (Module-2 specific) +pytest-runner==5.2 → Remove (not needed with modern packaging) +typing_extensions → Keep (version spec from Module-1) +``` + +### Extra Dependencies (requirements.extra.txt) +``` +datasets==2.4.0 → Keep/Update (Module-2 specific) +embeddings==0.0.8 → Keep (Module-2 specific) +networkx==2.4 → Keep/Update (Module-2 specific) +plotly==4.14.3 → Keep/Update (Module-2 specific) +pydot==1.4.1 → Keep/Update (Module-2 specific) +python-mnist → Keep (Module-2 specific) +streamlit==1.12.0 → Keep/Update (Module-2 specific) +streamlit-ace → Keep (Module-2 specific) +torch → Keep torch==2.8.0 (match Module-1) +watchdog==1.0.2 → Keep/Update (Module-2 specific) +``` + +## Task Structure Analysis + +### Module-2 Test Tasks +- `task2_1`: Tensor data and indexing (test_tensor_data.py) +- `task2_2`: Tensor broadcasting (test_tensor_data.py) +- `task2_3`: Tensor operations (test_tensor.py) +- `task2_4`: Tensor autodifferentiation (test_tensor.py) +- `task2_5`: Training implementation (no specific tests, validation through training) + +### Inherited Tasks +- `task0_*`: Synced from Module-0 via sync_previous_module.py +- `task1_*`: Synced from Module-1 via sync_previous_module.py +- **Note**: Not tested in Module-2 CI, only task2_* tests run + +## Expected File Structure After Modernization + +``` +Module-2/ +├── pyproject.toml # ✅ NEW: Modern packaging +├── installation.md # ✅ NEW: Installation guide +├── testing.md # ✅ NEW: Testing documentation +├── module2-modernization.md # ✅ NEW: This planning document +├── Module2.md # ✅ NEW: Assignment summary +├── README.md # ✅ UPDATED: Modern documentation +├── .pre-commit-config.yaml # ✅ NEW: Modern tools +├── .github/ # ✅ NEW: CI/CD infrastructure +│ ├── workflows/ +│ │ └── classroom.yaml # ✅ NEW: GitHub Actions +│ └── classroom/ +│ └── autograding.json # ✅ NEW: Module-2 specific tests +├── setup.py # ❌ REMOVED: Legacy +├── setup.cfg # ❌ REMOVED: Legacy +├── requirements.txt # ❌ REMOVED: Legacy +└── requirements.extra.txt # ❌ REMOVED: Legacy +``` + +## Benefits Expected +- **Performance**: Ruff is significantly faster than Black+Flake8+isort +- **Consistency**: Unified toolchain matching Module-0 and Module-1 +- **Modern**: Uses current best practices for Python packaging +- **Automated**: CI/CD pipeline for continuous testing +- **Type Safety**: Better type checking with Pyright +- **Maintainability**: Single configuration file (pyproject.toml) +- **Documentation**: Clear installation and testing guides + +## Testing Commands (Post-Modernization) +```bash +# Install in development mode +pip install -e ".[dev,extra]" + +# Run pre-commit +pre-commit run --all-files + +# Test module-specific tasks +pytest -m task2_1 +pytest -m task2_2 +pytest -m task2_3 +pytest -m task2_4 + +# Verify import +python -c "import minitorch; print('Success!')" + +# Run training (Task 2.5) +python project/run_tensor.py +``` + +## Next Steps +After completing this modernization plan: +1. Execute steps 1-9 in sequence +2. Test all functionality thoroughly +3. Document any Module-2 specific issues encountered +4. Use this as template for future module modernizations (Module-3, Module-4) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..19ba3991 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,192 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "minitorch" +version = "0.6" +description = "A minimal deep learning library for educational purposes" +requires-python = ">=3.8" +dependencies = [ + "colorama==0.4.6", + "hypothesis==6.138.2", + "numba==0.61.2", + "numpy>=1.24,<2.3", + "pytest==8.4.1", + "pytest-env==1.1.5", + "typing_extensions", +] + +[project.optional-dependencies] +dev = [ + "pre-commit==4.3.0", +] +extra = [ + "datasets==2.4.0", + "embeddings==0.0.8", + "networkx==3.5", + "plotly==5.24.1", + "pydot==1.4.1", + "python-mnist", + "streamlit==1.48.1", + "streamlit-ace", + "torch==2.8.0", + "watchdog==1.0.2", +] + +[tool.pyright] +include = ["**/minitorch"] +exclude = [ + "**/docs", + "**/docs/module2/**", + "**/assignments", + "**/project", + "**/mt_diagrams", + "**/.*", + "*chainrule.py*", + "**/minitorch/autodiff.py", + "sync_previous_module.py", +] +venvPath = "." +venv = ".venv" +reportUnknownMemberType = "none" +reportUnknownParameterType = "none" +reportUnknownArgumentType = "none" +reportUnknownVariableType = "none" +reportMissingTypeArgument = "none" +reportMissingTypeStubs = "none" +reportUnusedExpression = "none" +reportUnknownLambdaType = "none" +reportIncompatibleMethodOverride = "none" +reportPrivateUsage = "none" +reportMissingParameterType = "error" +reportMissingImports = "none" + +[tool.pytest.ini_options] +markers = [ + "task0_0", + "task0_1", + "task0_2", + "task0_3", + "task0_4", + "task1_0", + "task1_1", + "task1_2", + "task1_3", + "task1_4", + "task2_0", + "task2_1", + "task2_2", + "task2_3", + "task2_4", + "task3_0", + "task3_1", + "task3_2", + "task3_3", + "task3_4", + "task4_0", + "task4_1", + "task4_2", + "task4_3", + "task4_4", +] +[tool.ruff] +exclude = [ + ".git", + "__pycache__", + "**/docs/slides/*", + "old,build", + "dist", + "**/project/**/*", + "**/mt_diagrams/*", + "**/minitorch/testing.py", + "**/docs/**/*", + "minitorch/optim.py", + "minitorch/datasets.py", + "minitorch/scalar.py", + "minitorch/autodiff.py", + "minitorch/module.py", + "minitorch/tensor.py", + "minitorch/tensor_data.py", + "minitorch/tensor_functions.py", + "minitorch/tensor_ops.py", + "sync_previous_module.py", +] + +[tool.ruff.lint] +ignore = [ + "ANN401", + "N801", + "E203", + "E266", + "E501", + "E741", + "N803", + "N802", + "N806", + "D400", + "D401", + "D105", + "D415", + "D402", + "D205", + "D100", + "D101", + "D107", + "D213", + "ANN204", + "D203" +] +select = ["D", "E", "F", "N", "ANN"] +fixable = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "I", + "N", + "Q", + "S", + "T", + "W", + "ANN", + "ARG", + "BLE", + "COM", + "DJ", + "DTZ", + "EM", + "ERA", + "EXE", + "FBT", + "ICN", + "INP", + "ISC", + "NPY", + "PD", + "PGH", + "PIE", + "PL", + "PT", + "PTH", + "PYI", + "RET", + "RSE", + "RUF", + "SIM", + "SLF", + "TCH", + "TID", + "TRY", + "UP", + "YTT", +] +unfixable = [] + +[tool.ruff.lint.extend-per-file-ignores] +"tests/**/*.py" = ["D"] +"minitorch/scalar_functions.py" = ["ANN001", "ANN201"] +"minitorch/tensor_functions.py" = ["ANN001", "ANN201"] \ No newline at end of file diff --git a/requirements.extra.txt b/requirements.extra.txt deleted file mode 100644 index a3df13c6..00000000 --- a/requirements.extra.txt +++ /dev/null @@ -1,10 +0,0 @@ -datasets==2.4.0 -embeddings==0.0.8 -networkx==2.4 -plotly==4.14.3 -pydot==1.4.1 -python-mnist -streamlit==1.12.0 -streamlit-ace -torch -watchdog==1.0.2 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 40efede3..00000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -colorama==0.4.3 -hypothesis == 6.54 -mypy == 0.971 -numba == 0.56 -numpy == 1.22 -pre-commit == 2.20.0 -pytest == 7.1.2 -pytest-env -pytest-runner == 5.2 -typing_extensions diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 8a5fbd84..00000000 --- a/setup.cfg +++ /dev/null @@ -1,61 +0,0 @@ -[metadata] -name=minitorch -version=0.4 - -[files] -packages = - minitorch - -[darglint] -ignore_regex=((^_(.*))|(.*map)|(.*zip)|(.*reduce)|(test.*)|(tensor_.*)) -docstring_style=google -strictness=long - -[flake8] -ignore = N801, E203, E266, E501, W503, F812, E741, N803, N802, N806 -exclude = .git,__pycache__,docs/*,old,build,dist - -[isort] -profile=black -src_paths=minitorch,test - -[mypy] -strict = True -ignore_missing_imports = True -exclude=^(docs/)|(project/)|(assignments/) -implicit_reexport = True - -[mypy-tests.*] -disallow_untyped_decorators = False -implicit_reexport = True - -[black] -exclude=^(docs/)|(project/)|(assignments/) - -[tool:pytest] -markers = - task0_0 - task0_1 - task0_2 - task0_3 - task0_4 - task1_0 - task1_1 - task1_2 - task1_3 - task1_4 - task2_0 - task2_1 - task2_2 - task2_3 - task2_4 - task3_0 - task3_1 - task3_2 - task3_3 - task3_4 - task4_0 - task4_1 - task4_2 - task4_3 - task4_4 diff --git a/setup.py b/setup.py deleted file mode 100644 index ff4cfa9f..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup(py_modules=[]) diff --git a/sync_previous_module.py b/sync_previous_module.py index 5f682c89..2bbce42b 100644 --- a/sync_previous_module.py +++ b/sync_previous_module.py @@ -1,46 +1,72 @@ """ -Description: -Note: Make sure that both the new and old module files are in same directory! +Sync Previous Module Files -This script helps you sync your previous module works with current modules. -It takes 2 arguments, source_dir_name and destination_dir_name. -All the files which will be moved are specified in files_to_sync.txt as newline separated strings +This script helps you sync files from your previous module to the current module. +It copies files specified in 'files_to_sync.txt' from the source directory to the destination directory. -Usage: python sync_previous_module.py +Usage: python sync_previous_module.py -Ex: python sync_previous_module.py mle-module-0-sauravpanda24 mle-module-1-sauravpanda24 +Examples: + python sync_previous_module.py ./my-awesome-module-1 ./my-awesome-module-2 + python sync_previous_module.py ~/assignments/Module-1-unicorn_ninja ~/assignments/Module-2-unicorn_ninja """ import os import shutil import sys -if len(sys.argv) != 3: - print('Invalid argument count! Please pass source directory and destination directory after the file name') - sys.exit() +def print_usage(): + """Print usage information and examples.""" + print(__doc__) -# Get the users path to evaluate the username and root directory -current_path = os.getcwd() -grandparent_path = '/'.join(current_path.split('/')[:-1]) +def read_files_to_sync(): + """Read the list of files to sync from files_to_sync.txt""" + try: + with open("files_to_sync.txt", "r") as f: + return f.read().splitlines() + except FileNotFoundError: + print("Error: files_to_sync.txt not found!") + sys.exit(1) -print('Looking for modules in : ', grandparent_path) +def sync_files(source, dest, files_to_move): + """Copy files from source to destination directory.""" + if not os.path.exists(source): + print(f"Error: Source directory '{source}' does not exist!") + sys.exit(1) -# List of files which we want to move -f = open('files_to_sync.txt', 'r+') -files_to_move = f.read().splitlines() -f.close() + if not os.path.exists(dest): + print(f"Error: Destination directory '{dest}' does not exist!") + sys.exit(1) -# get the source and destination from arguments -source = sys.argv[1] -dest = sys.argv[2] - -# copy the files from source to destination -try: + copied_files = 0 for file in files_to_move: - print(f"Moving file : {file}") - shutil.copy( - os.path.join(grandparent_path, source, file), - os.path.join(grandparent_path, dest, file), - ) - print(f"Finished moving {len(files_to_move)} files") -except Exception: - print("Something went wrong! please check if the source and destination folders are present in same folder") + source_path = os.path.join(source, file) + dest_path = os.path.join(dest, file) + + if not os.path.exists(source_path): + print(f"Warning: File '{file}' not found in source directory, skipping") + continue + + try: + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + shutil.copy(source_path, dest_path) + print(f"Copied: {file}") + copied_files += 1 + except Exception as e: + print(f"Error copying '{file}': {e}") + + print(f"Finished copying {copied_files} files") + +def main(): + if len(sys.argv) != 3: + print("Error: Invalid number of arguments!") + print_usage() + sys.exit(1) + + source = sys.argv[1] + dest = sys.argv[2] + files_to_move = read_files_to_sync() + + sync_files(source, dest, files_to_move) + +if __name__ == "__main__": + main() diff --git a/testing.md b/testing.md new file mode 100644 index 00000000..cbf7b0d7 --- /dev/null +++ b/testing.md @@ -0,0 +1,146 @@ +## Testing Your Implementation + +### Running Tests + +This project uses pytest for testing. Tests are organized by task: + +```bash +# Run all tests for a specific task +pytest -m task2_1 # Tensor data and indexing +pytest -m task2_2 # Tensor broadcasting +pytest -m task2_3 # Tensor operations +pytest -m task2_4 # Tensor autodifferentiation + +# Run all tests +pytest + +# Run tests with verbose output +pytest -v + +# Run a specific test file +pytest tests/test_tensor_data.py # Tensor data structure tests +pytest tests/test_tensor.py # Tensor operations and autodiff tests +pytest tests/test_operators.py # Basic operators (from Module 0) +pytest tests/test_module.py # Module system tests (from Module 0) +pytest tests/test_scalar.py # Scalar tests (from Module 1) +pytest tests/test_autodiff.py # Autodiff tests (from Module 1) + +# Run a specific test function +pytest tests/test_tensor_data.py::test_index_to_position +pytest tests/test_tensor.py::test_tensor_sum +``` + +### Module 2 Specific Tests + +**Task 2.1 - Tensor Data:** +- Tests tensor indexing and storage management +- Verifies stride calculations and memory layout +- Checks permutation operations +- Tests `index_to_position` and `to_index` functions + +**Task 2.2 - Tensor Broadcasting:** +- Tests broadcasting rules for different tensor shapes +- Verifies `shape_broadcast` and `broadcast_index` functions +- Checks edge cases with dimension alignment +- Tests operations between tensors of different sizes + +**Task 2.3 - Tensor Operations:** +- Tests high-level tensor operations (map, zip, reduce) +- Verifies mathematical functions (add, mul, sigmoid, relu, etc.) +- Checks tensor creation and manipulation +- Tests tensor properties and methods + +**Task 2.4 - Tensor Autodifferentiation:** +- Tests gradient computation through tensor operations +- Verifies backpropagation with broadcasting +- Checks gradient accumulation and chain rule +- Tests complex computational graphs with tensors + +### Style and Code Quality Checks + +This project enforces code style and quality using several tools: + +```bash +# Run all pre-commit hooks (recommended) +pre-commit run --all-files + +# Individual style checks: +ruff check . # Linting (style, imports, docstrings) +ruff format . # Code formatting +pyright . # Type checking +``` + +### Understanding Test Output + +**Property Testing with Hypothesis:** +- Tests use hypothesis to generate random tensor shapes and values +- If a test fails, Hypothesis will show you the minimal failing example +- This helps you understand edge cases in your tensor implementation + +**Common Test Failures:** +- `AssertionError`: Your function returned an unexpected tensor or gradient +- `TypeError`: Missing or incorrect type annotations +- `ImportError`: Function not implemented or incorrectly named +- `AttributeError`: Missing methods in tensor classes +- `IndexingError`: Issues with tensor indexing or broadcasting + +**Gradient Testing:** +- Many tests compare your computed gradients against numerical approximations +- Small differences (< 1e-5) are usually acceptable due to floating point precision +- Large differences indicate errors in your derivative implementations + +**Broadcasting Errors:** +- Tests will check that tensors with incompatible shapes raise appropriate errors +- Verify that your broadcasting functions handle edge cases correctly + +### Task 2.5 - Training + +**Training Script:** +```bash +# Run tensor-based training +python project/run_tensor.py +``` + +**Expected Output:** +- Should train faster than scalar implementation +- Record time per epoch for performance comparison +- Train on all datasets: Simple, Diag, Split, Xor + +### Pre-commit Hooks (Automatic Style Checking) + +The project uses pre-commit hooks that run automatically before each commit: + +```bash +# Install pre-commit hooks (one-time setup) +pre-commit install + +# Now style checks run automatically on every commit +git commit -m "your message" # Will run style checks first +``` + +### GitHub Classroom Autograder + +The autograder runs the same tests and style checks: + +1. **Style Check (10 points)**: All pre-commit hooks must pass +2. **Task 2.1 (15 points)**: Tensor data and indexing implementation +3. **Task 2.2 (15 points)**: Tensor broadcasting implementation +4. **Task 2.3 (15 points)**: Tensor operations implementation +5. **Task 2.4 (15 points)**: Tensor autodifferentiation implementation +6. **Task 2.5 (30 points)**: Training and performance verification + +### Debugging Tools + +**Interactive Debugging:** +```bash +# Launch tensor visualization app +streamlit run project/app.py -- 2 + +# Test specific tensor operations +python -c "from minitorch import tensor; t = tensor([1,2,3]); print(t)" +``` + +**Performance Testing:** +- Compare training times between scalar and tensor implementations +- Verify that tensor operations are significantly faster +- Monitor memory usage with larger tensor operations \ No newline at end of file diff --git a/tests/tensor_strategies.py b/tests/tensor_strategies.py index e3186b86..edd3d152 100644 --- a/tests/tensor_strategies.py +++ b/tests/tensor_strategies.py @@ -106,7 +106,6 @@ def matmul_tensors( allow_nan=False, min_value=-100, max_value=100 ), ) -> List[Tensor]: - i, j, k = [draw(integers(min_value=1, max_value=10)) for _ in range(3)] l1 = (i, j) From 45580438476aecc0fdca2b225cf22c1937056294 Mon Sep 17 00:00:00 2001 From: Rkeramati Date: Sun, 28 Sep 2025 19:55:33 -0400 Subject: [PATCH 2/3] skipping pre commit check --- minitorch/autodiff.py | 20 +++++----- minitorch/module.py | 24 ++++++------ minitorch/operators.py | 59 +++++++++++++++-------------- minitorch/scalar.py | 31 ++++++++++------ minitorch/tensor.py | 70 +++++++++++++++++++---------------- minitorch/tensor_data.py | 33 +++++++++-------- minitorch/tensor_functions.py | 22 ++++++----- minitorch/tensor_ops.py | 42 +++++++++++++-------- 8 files changed, 169 insertions(+), 132 deletions(-) diff --git a/minitorch/autodiff.py b/minitorch/autodiff.py index 7d526be8..f7fa3b36 100644 --- a/minitorch/autodiff.py +++ b/minitorch/autodiff.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Iterable, Tuple +from typing import Any, Iterable, List, Tuple from typing_extensions import Protocol @@ -8,7 +8,8 @@ def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) -> Any: - r"""Computes an approximation to the derivative of `f` with respect to one arg. + r""" + Computes an approximation to the derivative of `f` with respect to one arg. See :doc:`derivative` or https://en.wikipedia.org/wiki/Finite_difference for more details. @@ -20,7 +21,6 @@ def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) Returns: An approximation of $f'_i(x_0, \ldots, x_{n-1})$ - """ raise NotImplementedError("Need to include this file from past assignment.") @@ -51,20 +51,21 @@ def chain_rule(self, d_output: Any) -> Iterable[Tuple["Variable", Any]]: def topological_sort(variable: Variable) -> Iterable[Variable]: - """Computes the topological order of the computation graph. + """ + Computes the topological order of the computation graph. Args: variable: The right-most variable Returns: Non-constant Variables in topological order starting from the right. - """ raise NotImplementedError("Need to include this file from past assignment.") def backpropagate(variable: Variable, deriv: Any) -> None: - """Runs backpropagation on the computation graph in order to + """ + Runs backpropagation on the computation graph in order to compute derivatives for the leave nodes. Args: @@ -72,20 +73,21 @@ def backpropagate(variable: Variable, deriv: Any) -> None: deriv : Its derivative that we want to propagate backward to the leaves. No return. Should write to its results to the derivative values of each leaf through `accumulate_derivative`. - """ raise NotImplementedError("Need to include this file from past assignment.") @dataclass class Context: - """Context class is used by `Function` to store information during the forward pass.""" + """ + Context class is used by `Function` to store information during the forward pass. + """ no_grad: bool = False saved_values: Tuple[Any, ...] = () def save_for_backward(self, *values: Any) -> None: - """Store the given `values` if they need to be used during backpropagation.""" + "Store the given `values` if they need to be used during backpropagation." if self.no_grad: return self.saved_values = values diff --git a/minitorch/module.py b/minitorch/module.py index 01b6aac1..11fc1f39 100644 --- a/minitorch/module.py +++ b/minitorch/module.py @@ -4,7 +4,8 @@ class Module: - """Modules form a tree that store parameters and other + """ + Modules form a tree that store parameters and other submodules. They make up the basis of neural network stacks. Attributes: @@ -24,34 +25,35 @@ def __init__(self) -> None: self.training = True def modules(self) -> Sequence[Module]: - """Return the direct child modules of this module.""" + "Return the direct child modules of this module." m: Dict[str, Module] = self.__dict__["_modules"] return list(m.values()) def train(self) -> None: - """Set the mode of this module and all descendent modules to `train`.""" + "Set the mode of this module and all descendent modules to `train`." raise NotImplementedError("Need to include this file from past assignment.") def eval(self) -> None: - """Set the mode of this module and all descendent modules to `eval`.""" + "Set the mode of this module and all descendent modules to `eval`." raise NotImplementedError("Need to include this file from past assignment.") def named_parameters(self) -> Sequence[Tuple[str, Parameter]]: - """Collect all the parameters of this module and its descendents. + """ + Collect all the parameters of this module and its descendents. Returns: The name and `Parameter` of each ancestor parameter. - """ raise NotImplementedError("Need to include this file from past assignment.") def parameters(self) -> Sequence[Parameter]: - """Enumerate over all the parameters of this module and its descendents.""" + "Enumerate over all the parameters of this module and its descendents." raise NotImplementedError("Need to include this file from past assignment.") def add_parameter(self, k: str, v: Any) -> Parameter: - """Manually add a parameter. Useful helper for scalar parameters. + """ + Manually add a parameter. Useful helper for scalar parameters. Args: k: Local name of the parameter. @@ -59,7 +61,6 @@ def add_parameter(self, k: str, v: Any) -> Parameter: Returns: Newly created parameter. - """ val = Parameter(v, k) self.__dict__["_parameters"][k] = val @@ -113,7 +114,8 @@ def _addindent(s_: str, numSpaces: int) -> str: class Parameter: - """A Parameter is a special container stored in a `Module`. + """ + A Parameter is a special container stored in a `Module`. It is designed to hold a `Variable`, but we allow it to hold any value for testing. @@ -128,7 +130,7 @@ def __init__(self, x: Any, name: Optional[str] = None) -> None: self.value.name = self.name def update(self, x: Any) -> None: - """Update the parameter value.""" + "Update the parameter value." self.value = x if hasattr(x, "requires_grad_"): self.value.requires_grad_(True) diff --git a/minitorch/operators.py b/minitorch/operators.py index ef455ba0..895ae82d 100644 --- a/minitorch/operators.py +++ b/minitorch/operators.py @@ -1,4 +1,6 @@ -"""Collection of the core mathematical operators used throughout the code base.""" +""" +Collection of the core mathematical operators used throughout the code base. +""" import math from typing import Callable, Iterable @@ -9,47 +11,48 @@ def mul(x: float, y: float) -> float: - """$f(x, y) = x * y$""" + "$f(x, y) = x * y$" raise NotImplementedError("Need to include this file from past assignment.") def id(x: float) -> float: - """$f(x) = x$""" + "$f(x) = x$" raise NotImplementedError("Need to include this file from past assignment.") def add(x: float, y: float) -> float: - """$f(x, y) = x + y$""" + "$f(x, y) = x + y$" raise NotImplementedError("Need to include this file from past assignment.") def neg(x: float) -> float: - """$f(x) = -x$""" + "$f(x) = -x$" raise NotImplementedError("Need to include this file from past assignment.") def lt(x: float, y: float) -> float: - """$f(x) =$ 1.0 if x is less than y else 0.0""" + "$f(x) =$ 1.0 if x is less than y else 0.0" raise NotImplementedError("Need to include this file from past assignment.") def eq(x: float, y: float) -> float: - """$f(x) =$ 1.0 if x is equal to y else 0.0""" + "$f(x) =$ 1.0 if x is equal to y else 0.0" raise NotImplementedError("Need to include this file from past assignment.") def max(x: float, y: float) -> float: - """$f(x) =$ x if x is greater than y else y""" + "$f(x) =$ x if x is greater than y else y" raise NotImplementedError("Need to include this file from past assignment.") def is_close(x: float, y: float) -> float: - """$f(x) = |x - y| < 1e-2$""" + "$f(x) = |x - y| < 1e-2$" raise NotImplementedError("Need to include this file from past assignment.") def sigmoid(x: float) -> float: - r"""$f(x) = \frac{1.0}{(1.0 + e^{-x})}$ + r""" + $f(x) = \frac{1.0}{(1.0 + e^{-x})}$ (See https://en.wikipedia.org/wiki/Sigmoid_function ) @@ -63,7 +66,8 @@ def sigmoid(x: float) -> float: def relu(x: float) -> float: - """$f(x) =$ x if x is greater than 0, else 0 + """ + $f(x) =$ x if x is greater than 0, else 0 (See https://en.wikipedia.org/wiki/Rectifier_(neural_networks) .) """ @@ -74,32 +78,32 @@ def relu(x: float) -> float: def log(x: float) -> float: - """$f(x) = log(x)$""" + "$f(x) = log(x)$" return math.log(x + EPS) def exp(x: float) -> float: - """$f(x) = e^{x}$""" + "$f(x) = e^{x}$" return math.exp(x) def log_back(x: float, d: float) -> float: - r"""If $f = log$ as above, compute $d \times f'(x)$""" + r"If $f = log$ as above, compute $d \times f'(x)$" raise NotImplementedError("Need to include this file from past assignment.") def inv(x: float) -> float: - """$f(x) = 1/x$""" + "$f(x) = 1/x$" raise NotImplementedError("Need to include this file from past assignment.") def inv_back(x: float, d: float) -> float: - r"""If $f(x) = 1/x$ compute $d \times f'(x)$""" + r"If $f(x) = 1/x$ compute $d \times f'(x)$" raise NotImplementedError("Need to include this file from past assignment.") def relu_back(x: float, d: float) -> float: - r"""If $f = relu$ compute $d \times f'(x)$""" + r"If $f = relu$ compute $d \times f'(x)$" raise NotImplementedError("Need to include this file from past assignment.") @@ -109,7 +113,8 @@ def relu_back(x: float, d: float) -> float: def map(fn: Callable[[float], float]) -> Callable[[Iterable[float]], Iterable[float]]: - """Higher-order map. + """ + Higher-order map. See https://en.wikipedia.org/wiki/Map_(higher-order_function) @@ -119,20 +124,20 @@ def map(fn: Callable[[float], float]) -> Callable[[Iterable[float]], Iterable[fl Returns: A function that takes a list, applies `fn` to each element, and returns a new list - """ raise NotImplementedError("Need to include this file from past assignment.") def negList(ls: Iterable[float]) -> Iterable[float]: - """Use `map` and `neg` to negate each element in `ls`""" + "Use `map` and `neg` to negate each element in `ls`" raise NotImplementedError("Need to include this file from past assignment.") def zipWith( - fn: Callable[[float, float], float], + fn: Callable[[float, float], float] ) -> Callable[[Iterable[float], Iterable[float]], Iterable[float]]: - """Higher-order zipwith (or map2). + """ + Higher-order zipwith (or map2). See https://en.wikipedia.org/wiki/Map_(higher-order_function) @@ -148,14 +153,15 @@ def zipWith( def addLists(ls1: Iterable[float], ls2: Iterable[float]) -> Iterable[float]: - """Add the elements of `ls1` and `ls2` using `zipWith` and `add`""" + "Add the elements of `ls1` and `ls2` using `zipWith` and `add`" raise NotImplementedError("Need to include this file from past assignment.") def reduce( fn: Callable[[float, float], float], start: float ) -> Callable[[Iterable[float]], float]: - r"""Higher-order reduce. + r""" + Higher-order reduce. Args: fn: combine two values @@ -165,16 +171,15 @@ def reduce( Function that takes a list `ls` of elements $x_1 \ldots x_n$ and computes the reduction :math:`fn(x_3, fn(x_2, fn(x_1, x_0)))` - """ raise NotImplementedError("Need to include this file from past assignment.") def sum(ls: Iterable[float]) -> float: - """Sum up a list using `reduce` and `add`.""" + "Sum up a list using `reduce` and `add`." raise NotImplementedError("Need to include this file from past assignment.") def prod(ls: Iterable[float]) -> float: - """Product of a list using `reduce` and `mul`.""" + "Product of a list using `reduce` and `mul`." raise NotImplementedError("Need to include this file from past assignment.") diff --git a/minitorch/scalar.py b/minitorch/scalar.py index 7d1e9ca0..3c853a2e 100644 --- a/minitorch/scalar.py +++ b/minitorch/scalar.py @@ -7,9 +7,17 @@ from .autodiff import Context, Variable, backpropagate, central_difference from .scalar_functions import ( + EQ, + LT, + Add, + Exp, Inv, + Log, Mul, + Neg, + ReLU, ScalarFunction, + Sigmoid, ) ScalarLike = Union[float, int, "Scalar"] @@ -17,7 +25,8 @@ @dataclass class ScalarHistory: - """`ScalarHistory` stores the history of `Function` operations that was + """ + `ScalarHistory` stores the history of `Function` operations that was used to construct the current Variable. Attributes: @@ -39,7 +48,8 @@ class ScalarHistory: class Scalar: - """A reimplementation of scalar values for autodifferentiation + """ + A reimplementation of scalar values for autodifferentiation tracking. Scalar Variables behave as close as possible to standard Python numbers while also tracking the operations that led to the number's creation. They can only be manipulated by @@ -123,12 +133,12 @@ def relu(self) -> Scalar: # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """Add `val` to the the derivative accumulated on this variable. + """ + Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: x: value to be accumulated - """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.derivative is None: @@ -136,7 +146,7 @@ def accumulate_derivative(self, x: Any) -> None: self.derivative += x def is_leaf(self) -> bool: - """True if this variable created by the user (no `last_fn`)""" + "True if this variable created by the user (no `last_fn`)" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -156,12 +166,12 @@ def chain_rule(self, d_output: Any) -> Iterable[Tuple[Variable, Any]]: raise NotImplementedError("Need to include this file from past assignment.") def backward(self, d_output: Optional[float] = None) -> None: - """Calls autodiff to fill in the derivatives for the history of this object. + """ + Calls autodiff to fill in the derivatives for the history of this object. Args: d_output (number, opt): starting derivative to backpropagate through the model (typically left out, and assumed to be 1.0). - """ if d_output is None: d_output = 1.0 @@ -169,14 +179,13 @@ def backward(self, d_output: Optional[float] = None) -> None: def derivative_check(f: Any, *scalars: Scalar) -> None: - """Checks that autodiff works on a python function. + """ + Checks that autodiff works on a python function. Asserts False if derivative is incorrect. - Parameters - ---------- + Parameters: f : function from n-scalars to 1-scalar. *scalars : n input scalar values. - """ out = f(*scalars) out.backward() diff --git a/minitorch/tensor.py b/minitorch/tensor.py index 05179917..c675699b 100644 --- a/minitorch/tensor.py +++ b/minitorch/tensor.py @@ -1,4 +1,6 @@ -"""Implementation of the core Tensor object for autodifferentiation.""" +""" +Implementation of the core Tensor object for autodifferentiation. +""" from __future__ import annotations @@ -45,7 +47,8 @@ @dataclass class History: - """`History` stores the history of `Function` operations that was + """ + `History` stores the history of `Function` operations that was used to construct the current Variable. """ @@ -58,7 +61,8 @@ class History: class Tensor: - """Tensor is a generalization of Scalar in that it is a Variable that + """ + Tensor is a generalization of Scalar in that it is a Variable that handles multidimensional arrays. """ @@ -99,39 +103,39 @@ def requires_grad(self) -> bool: return self.history is not None def to_numpy(self) -> npt.NDArray[np.float64]: - """Returns: - Converted to numpy array - + """ + Returns: + Converted to numpy array """ return self.contiguous()._tensor._storage.reshape(self.shape) # Properties @property def shape(self) -> UserShape: - """Returns: - shape of the tensor - + """ + Returns: + shape of the tensor """ return self._tensor.shape @property def size(self) -> int: - """Returns: - int : size of the tensor - + """ + Returns: + int : size of the tensor """ return self._tensor.size @property def dims(self) -> int: - """Returns: - int : dimensionality of the tensor - + """ + Returns: + int : dimensionality of the tensor """ return self._tensor.dims def _ensure_tensor(self, b: TensorLike) -> Tensor: - """Turns a python number into a tensor with the same backend.""" + "Turns a python number into a tensor with the same backend." if isinstance(b, (int, float)): c = Tensor.make([b], (1,), backend=self.backend) else: @@ -156,7 +160,7 @@ def __rtruediv__(self, b: TensorLike) -> Tensor: return Mul.apply(self._ensure_tensor(b), Inv.apply(self)) def __matmul__(self, b: Tensor) -> Tensor: - """Not used until Module 3""" + "Not used until Module 3" return MatMul.apply(self, b) def __lt__(self, b: TensorLike) -> Tensor: @@ -203,29 +207,29 @@ def item(self) -> float: return self[0] def sum(self, dim: Optional[int] = None) -> Tensor: - """Compute the sum over dimension `dim`""" + "Compute the sum over dimension `dim`" if dim is None: return Sum.apply(self.contiguous().view(self.size), self._ensure_tensor(0)) else: return Sum.apply(self, self._ensure_tensor(dim)) def mean(self, dim: Optional[int] = None) -> Tensor: - """Compute the mean over dimension `dim`""" + "Compute the mean over dimension `dim`" if dim is not None: return self.sum(dim) / self.shape[dim] else: return self.sum() / self.size def permute(self, *order: int) -> Tensor: - """Permute tensor dimensions to *order""" + "Permute tensor dimensions to *order" return Permute.apply(self, tensor(list(order))) def view(self, *shape: int) -> Tensor: - """Change the shape of the tensor to a new shape with the same size""" + "Change the shape of the tensor to a new shape with the same size" return View.apply(self, tensor(list(shape))) def contiguous(self) -> Tensor: - """Return a contiguous tensor with the same data""" + "Return a contiguous tensor with the same data" return Copy.apply(self) def __repr__(self) -> str: @@ -255,24 +259,24 @@ def make( strides: Optional[UserStrides] = None, backend: Optional[TensorBackend] = None, ) -> Tensor: - """Create a new tensor from data""" + "Create a new tensor from data" return Tensor(TensorData(storage, shape, strides), backend=backend) def expand(self, other: Tensor) -> Tensor: - """Method used to allow for backprop over broadcasting. + """ + Method used to allow for backprop over broadcasting. This method is called when the output of `backward` is a different size than the input of `forward`. - Parameters - ---------- + Parameters: other : backward tensor (must broadcast with self) - Returns - ------- + Returns: Expanded version of `other` with the right derivatives """ + # Case 1: Both the same shape. if self.shape == other.shape: return other @@ -317,12 +321,12 @@ def detach(self) -> Tensor: # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """Add `val` to the the derivative accumulated on this variable. + """ + Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: x : value to be accumulated - """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.grad is None: @@ -332,7 +336,7 @@ def accumulate_derivative(self, x: Any) -> None: self.grad += x def is_leaf(self) -> bool: - """True if this variable created by the user (no `last_fn`)""" + "True if this variable created by the user (no `last_fn`)" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -363,5 +367,7 @@ def backward(self, grad_output: Optional[Tensor] = None) -> None: backpropagate(self, grad_output) def zero_grad_(self) -> None: # pragma: no cover - """Reset the derivative on this variable.""" + """ + Reset the derivative on this variable. + """ self.grad = None diff --git a/minitorch/tensor_data.py b/minitorch/tensor_data.py index 22b42400..452b7904 100644 --- a/minitorch/tensor_data.py +++ b/minitorch/tensor_data.py @@ -15,8 +15,7 @@ class IndexingError(RuntimeError): - """Exception raised for indexing errors.""" - + "Exception raised for indexing errors." pass @@ -32,7 +31,8 @@ class IndexingError(RuntimeError): def index_to_position(index: Index, strides: Strides) -> int: - """Converts a multidimensional tensor `index` into a single-dimensional position in + """ + Converts a multidimensional tensor `index` into a single-dimensional position in storage based on strides. Args: @@ -41,14 +41,15 @@ def index_to_position(index: Index, strides: Strides) -> int: Returns: Position in storage - """ + # TODO: Implement for Task 2.1. raise NotImplementedError("Need to implement for Task 2.1") def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: - """Convert an `ordinal` to an index in the `shape`. + """ + Convert an `ordinal` to an index in the `shape`. Should ensure that enumerating position 0 ... size of a tensor produces every index exactly once. It may not be the inverse of `index_to_position`. @@ -66,7 +67,8 @@ def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: def broadcast_index( big_index: Index, big_shape: Shape, shape: Shape, out_index: OutIndex ) -> None: - """Convert a `big_index` into `big_shape` to a smaller `out_index` + """ + Convert a `big_index` into `big_shape` to a smaller `out_index` into `shape` following broadcasting rules. In this case it may be larger or with more dimensions than the `shape` given. Additional dimensions may need to be mapped to 0 or @@ -80,14 +82,14 @@ def broadcast_index( Returns: None - """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") def shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape: - """Broadcast two shapes to create a new union shape. + """ + Broadcast two shapes to create a new union shape. Args: shape1 : first shape @@ -98,7 +100,6 @@ def shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape: Raises: IndexingError : if cannot broadcast - """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") @@ -152,11 +153,11 @@ def to_cuda_(self) -> None: # pragma: no cover self._storage = numba.cuda.to_device(self._storage) def is_contiguous(self) -> bool: - """Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. + """ + Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. Returns: bool : True if contiguous - """ last = 1e9 for stride in self._strides: @@ -208,18 +209,18 @@ def tuple(self) -> Tuple[Storage, Shape, Strides]: return (self._storage, self._shape, self._strides) def permute(self, *order: int) -> TensorData: - """Permute the dimensions of the tensor. + """ + Permute the dimensions of the tensor. Args: order (list): a permutation of the dimensions Returns: New `TensorData` with the same storage and a new dimension order. - """ - assert list(sorted(order)) == list(range(len(self.shape))), ( - f"Must give a position to each dimension. Shape: {self.shape} Order: {order}" - ) + assert list(sorted(order)) == list( + range(len(self.shape)) + ), f"Must give a position to each dimension. Shape: {self.shape} Order: {order}" # TODO: Implement for Task 2.1. raise NotImplementedError("Need to implement for Task 2.1") diff --git a/minitorch/tensor_functions.py b/minitorch/tensor_functions.py index 3c63b0df..86db01a1 100644 --- a/minitorch/tensor_functions.py +++ b/minitorch/tensor_functions.py @@ -1,4 +1,6 @@ -"""Implementation of the autodifferentiation Functions for Tensor.""" +""" +Implementation of the autodifferentiation Functions for Tensor. +""" from __future__ import annotations @@ -21,7 +23,7 @@ def wrap_tuple(x): # type: ignore - """Turn a possible value into a tuple""" + "Turn a possible value into a tuple" if isinstance(x, tuple): return x return (x,) @@ -272,7 +274,8 @@ def transpose(a: Tensor) -> Tensor: # Helpers for Constructing tensors def zeros(shape: UserShape, backend: TensorBackend = SimpleBackend) -> Tensor: - """Produce a zero tensor of size `shape`. + """ + Produce a zero tensor of size `shape`. Args: shape : shape of tensor @@ -280,7 +283,6 @@ def zeros(shape: UserShape, backend: TensorBackend = SimpleBackend) -> Tensor: Returns: new tensor - """ return minitorch.Tensor.make( [0] * int(operators.prod(shape)), shape, backend=backend @@ -292,7 +294,8 @@ def rand( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """Produce a random tensor of size `shape`. + """ + Produce a random tensor of size `shape`. Args: shape : shape of tensor @@ -301,7 +304,6 @@ def rand( Returns: :class:`Tensor` : new tensor - """ vals = [random.random() for _ in range(int(operators.prod(shape)))] tensor = minitorch.Tensor.make(vals, shape, backend=backend) @@ -315,7 +317,8 @@ def _tensor( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """Produce a tensor with data ls and shape `shape`. + """ + Produce a tensor with data ls and shape `shape`. Args: ls: data for tensor @@ -325,7 +328,6 @@ def _tensor( Returns: new tensor - """ tensor = minitorch.Tensor.make(ls, shape, backend=backend) tensor.requires_grad_(requires_grad) @@ -335,7 +337,8 @@ def _tensor( def tensor( ls: Any, backend: TensorBackend = SimpleBackend, requires_grad: bool = False ) -> Tensor: - """Produce a tensor with data and shape from ls + """ + Produce a tensor with data and shape from ls Args: ls: data for tensor @@ -344,7 +347,6 @@ def tensor( Returns: :class:`Tensor` : new tensor - """ def shape(ls: Any) -> List[int]: diff --git a/minitorch/tensor_ops.py b/minitorch/tensor_ops.py index 9683d144..96411b42 100644 --- a/minitorch/tensor_ops.py +++ b/minitorch/tensor_ops.py @@ -2,20 +2,26 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Type +import numpy as np from typing_extensions import Protocol from . import operators from .tensor_data import ( + MAX_DIMS, + broadcast_index, + index_to_position, shape_broadcast, + to_index, ) if TYPE_CHECKING: from .tensor import Tensor - from .tensor_data import Shape, Storage, Strides + from .tensor_data import Index, Shape, Storage, Strides class MapProto(Protocol): - def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: ... + def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: + ... class TensorOps: @@ -46,7 +52,8 @@ def matrix_multiply(a: Tensor, b: Tensor) -> Tensor: class TensorBackend: def __init__(self, ops: Type[TensorOps]): - """Dynamically construct a tensor backend based on a `tensor_ops` object + """ + Dynamically construct a tensor backend based on a `tensor_ops` object that implements map, zip, and reduce higher-order functions. Args: @@ -57,6 +64,7 @@ def __init__(self, ops: Type[TensorOps]): A collection of tensor functions """ + # Maps self.neg_map = ops.map(operators.neg) self.sigmoid_map = ops.map(operators.sigmoid) @@ -87,7 +95,8 @@ def __init__(self, ops: Type[TensorOps]): class SimpleOps(TensorOps): @staticmethod def map(fn: Callable[[float], float]) -> MapProto: - """Higher-order tensor map function :: + """ + Higher-order tensor map function :: fn_map = map(fn) fn_map(a, out) @@ -113,8 +122,8 @@ def map(fn: Callable[[float], float]) -> MapProto: Returns: new tensor data - """ + f = tensor_map(fn) def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @@ -127,9 +136,10 @@ def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @staticmethod def zip( - fn: Callable[[float, float], float], + fn: Callable[[float, float], float] ) -> Callable[["Tensor", "Tensor"], "Tensor"]: - """Higher-order tensor zip function :: + """ + Higher-order tensor zip function :: fn_zip = zip(fn) out = fn_zip(a, b) @@ -154,8 +164,8 @@ def zip( Returns: :class:`TensorData` : new tensor data - """ + f = tensor_zip(fn) def ret(a: "Tensor", b: "Tensor") -> "Tensor": @@ -173,7 +183,8 @@ def ret(a: "Tensor", b: "Tensor") -> "Tensor": def reduce( fn: Callable[[float, float], float], start: float = 0.0 ) -> Callable[["Tensor", int], "Tensor"]: - """Higher-order tensor reduce function. :: + """ + Higher-order tensor reduce function. :: fn_reduce = reduce(fn) out = fn_reduce(a, dim) @@ -193,7 +204,6 @@ def reduce( Returns: :class:`TensorData` : new tensor - """ f = tensor_reduce(fn) @@ -221,7 +231,8 @@ def matrix_multiply(a: "Tensor", b: "Tensor") -> "Tensor": def tensor_map(fn: Callable[[float], float]) -> Any: - """Low-level implementation of tensor map between + """ + Low-level implementation of tensor map between tensors with *possibly different strides*. Simple version: @@ -247,7 +258,6 @@ def tensor_map(fn: Callable[[float], float]) -> Any: Returns: None : Fills in `out` - """ def _map( @@ -265,7 +275,8 @@ def _map( def tensor_zip(fn: Callable[[float, float], float]) -> Any: - """Low-level implementation of tensor zip between + """ + Low-level implementation of tensor zip between tensors with *possibly different strides*. Simple version: @@ -294,7 +305,6 @@ def tensor_zip(fn: Callable[[float, float], float]) -> Any: Returns: None : Fills in `out` - """ def _zip( @@ -315,7 +325,8 @@ def _zip( def tensor_reduce(fn: Callable[[float, float], float]) -> Any: - """Low-level implementation of tensor reduce. + """ + Low-level implementation of tensor reduce. * `out_shape` will be the same as `a_shape` except with `reduce_dim` turned to size `1` @@ -332,7 +343,6 @@ def tensor_reduce(fn: Callable[[float, float], float]) -> Any: Returns: None : Fills in `out` - """ def _reduce( From 79a1c226ffe62234662a896a71dc9ddd3c90bca0 Mon Sep 17 00:00:00 2001 From: Rkeramati Date: Sun, 28 Sep 2025 20:00:18 -0400 Subject: [PATCH 3/3] upgraded documentation --- .gitignore | 3 +- Module2.md | 172 ----------------------------------- module2-modernization.md | 191 --------------------------------------- 3 files changed, 2 insertions(+), 364 deletions(-) delete mode 100644 Module2.md delete mode 100644 module2-modernization.md diff --git a/.gitignore b/.gitignore index 9f521073..0230ad9c 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,5 @@ dmypy.json .pyre/ *.\#* data/ -pyodide \ No newline at end of file +pyodideModule2.md +module2-modernization.md diff --git a/Module2.md b/Module2.md deleted file mode 100644 index acc6889e..00000000 --- a/Module2.md +++ /dev/null @@ -1,172 +0,0 @@ -# Module-2 Assignment Summary - -## Overview -Module-2 introduces **Tensors** - multidimensional arrays that extend and optimize the scalar autodifferentiation system from Module-1. While Module-1's scalar system is correct, it's inefficient due to Python overhead from individual scalar objects and operations. Tensors solve this by grouping operations together and enabling faster implementations. - -## Learning Objectives -- Build efficient tensor data structures with proper indexing and memory layout -- Implement tensor operations (map, zip, reduce) for element-wise and reduction operations -- Extend autodifferentiation to work with tensors and broadcasting -- Create tensor-based neural networks that outperform scalar implementations -- Understand memory optimization through strides, views, and broadcasting - -## Problem Statement -The scalar system from Module-1 has performance issues: -- Every scalar requires building an object -- Each operation stores a complete computation graph -- Training requires repeated operations with Python overhead -- Models like linear regression need inefficient for loops - -**Solution**: Tensors group repeated operations to save Python overhead and delegate to faster implementations. - -## Core Architecture - -### Key Files -- **tensor.py** - User-facing Tensor interface (similar to scalar.py) -- **tensor_data.py** - Core indexing, strides, storage management -- **tensor_ops.py** - Higher-order tensor operations (map, zip, reduce) -- **tensor_functions.py** - Autodifferentiation-ready tensor functions - -### Supporting Files -- **operators.py** - Mathematical operators (inherited from Module-1) -- **autodiff.py** - Autodifferentiation framework (inherited from Module-1) - -## Detailed Task Breakdown - -### Task 2.1: Tensor Data - Indexing -**File**: `minitorch/tensor_data.py` -**Objective**: Implement core tensor backend (`TensorData`) for indexing and storage - -**Functions to Implement**: -1. **`index_to_position(index: Index, strides: Strides) -> int`** - - Converts multidimensional tensor index to single storage position - - Uses strides to calculate memory position - -2. **`to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None`** - - Converts ordinal position (0...size-1) to multidimensional index - - Ensures enumeration produces every index exactly once - - May not be inverse of `index_to_position` - -3. **`TensorData.permute(*order: int) -> TensorData`** - - Permutes tensor dimensions - - Returns new TensorData with same storage, new dimension order - -**Key Concepts**: -- **Storage**: Flat 1D array containing tensor data -- **Shape**: Dimensions of tensor (e.g., (3, 4, 5)) -- **Strides**: Memory navigation pattern (how many positions to skip per dimension) - -### Task 2.2: Tensor Broadcasting -**File**: `minitorch/tensor_data.py` -**Objective**: Implement broadcasting for operations between differently-shaped tensors - -**Functions to Implement**: -1. **`shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape`** - - Creates union shape from two shapes following broadcasting rules - - Raises `IndexingError` if shapes cannot broadcast - -2. **`broadcast_index(big_index, big_shape, shape, out_index) -> None`** - - Converts index from larger tensor to smaller tensor - - Handles dimension mapping (may map to 0 or remove dimensions) - -**Broadcasting Rules**: -- Tensors aligned from rightmost dimension -- Dimensions of size 1 can broadcast to any size -- Missing dimensions treated as size 1 -- Example: (3, 1) + (3, 4) → (3, 4) - -### Task 2.3: Tensor Operations -**File**: `minitorch/tensor_ops.py`, `minitorch/tensor_functions.py`, `minitorch/tensor.py` -**Objective**: Implement high-level tensor operations and user interface - -**Core Operations in `tensor_ops.py`**: -1. **`tensor_map(fn) -> Callable`** - - Applies function element-wise to tensor - - Handles broadcasting between different shapes - -2. **`tensor_zip(fn) -> Callable`** - - Applies binary function element-wise to two tensors - - Supports broadcasting - -3. **`tensor_reduce(fn) -> Callable`** - - Reduces tensor along specified dimension - - Output shape same as input except reduced dimension becomes size 1 - -**Forward Functions in `tensor_functions.py`**: -- **Unary**: Mul, Sigmoid, ReLU, Log, Exp -- **Binary**: LT, EQ, IsClose -- **Reductions**: Sum (with dim argument) -- **Shape Operations**: Permute - -**User Interface in `tensor.py`**: -- **Properties**: size, dims -- **Operators**: add, sub, mul, lt, eq, gt, neg, radd, rmul -- **Functions**: all, is_close, sigmoid, relu, log, exp -- **Reductions**: sum, mean (with optional dim) -- **Shape Operations**: permute, view -- **Utilities**: zero_grad_ - -### Task 2.4: Gradients and Autograd -**File**: `minitorch/tensor_functions.py` -**Objective**: Implement backward functions for tensor autodifferentiation - -**Key Challenges**: -- Gradient computation through broadcasting operations -- Proper gradient aggregation when tensors are broadcast -- Maintaining computation graph for complex tensor operations - -**Similar to Module-1**: Tensors are `Variable` objects supporting autodifferentiation, but now handle multidimensional arrays efficiently. - -### Task 2.5: Training -**File**: `project/run_tensor.py` -**Objective**: Implement tensor-based neural network training - -**Requirements**: -- Three-layer neural network: 2 → Hidden (ReLU) → Hidden (ReLU) → Output (Sigmoid) -- Same functionality as `project/run_scalar.py` but using tensor operations -- Train on all datasets and record results -- Measure and report time per epoch - -## Key Technical Concepts - -### Memory Layout and Strides -- **Contiguous**: Data stored in row-major (C-style) order -- **Strides**: Define memory access pattern for each dimension -- **Views**: Different tensor shapes sharing same underlying storage - -### Tensor Operations Hierarchy -1. **Low-level**: `tensor_ops.py` (map, zip, reduce) -2. **Mid-level**: `tensor_functions.py` (mathematical functions) -3. **High-level**: `tensor.py` (user-friendly interface) - -### Performance Benefits -- **Reduced Python Overhead**: Group operations instead of individual scalars -- **Vectorized Operations**: Delegate to optimized implementations -- **Memory Efficiency**: Shared storage through views and broadcasting -- **Graph Optimization**: Fewer nodes in computation graph - -## Testing Structure -- **task2_1**: `test_tensor_data.py` - Indexing and layout tests -- **task2_2**: `test_tensor_data.py` - Broadcasting tests -- **task2_3**: `test_tensor.py` - Function and operation tests -- **task2_4**: `test_tensor.py` - Autodifferentiation tests - -## Expected Performance Improvement -Moving from scalar to tensor implementation should provide: -- Significant speedup in training time per epoch -- Reduced memory usage through efficient storage -- Better scalability for larger models and datasets - -## Integration with Previous Modules -**Synced Files from Module-1**: -- `minitorch/operators.py` - Mathematical operators -- `minitorch/module.py` - Neural network module framework -- `minitorch/autodiff.py` - Core autodifferentiation system -- `minitorch/scalar.py` - Scalar implementation -- `project/run_manual.py` - Manual gradient checking -- `project/run_scalar.py` - Scalar-based training - -## Debugging Tools -- **Expression Visualization**: `streamlit run project/app.py -- 2` -- **Graph Builder**: View computation graphs for tensor operations -- **Tensor Debugging**: Interactive tools for understanding tensor operations \ No newline at end of file diff --git a/module2-modernization.md b/module2-modernization.md deleted file mode 100644 index 24041047..00000000 --- a/module2-modernization.md +++ /dev/null @@ -1,191 +0,0 @@ -# Module-2 Modernization Plan - -## Overview -This document outlines the step-by-step modernization of Module-2 to match the modern Python development practices implemented in Module-0 and successfully applied to Module-1. - -## Current State Analysis - -### Legacy Files Present -- `setup.py` - Minimal setup file (contains only `py_modules=[]`) -- `setup.cfg` - Contains tool configurations and version 0.4 -- `requirements.txt` - Legacy dependency management -- `requirements.extra.txt` - Additional dependencies -- No modern packaging or CI/CD infrastructure - -### Module-2 Specific Considerations -- **Version**: Currently 0.4, should update to 0.6 (Module-0: 0.4, Module-1: 0.5) -- **Tasks**: Only task2_* tests (task2_1, task2_2, task2_3, task2_4, task2_5) -- **Dependencies**: More extensive than Module-1 (includes streamlit, plotly, torch, etc.) -- **Inherited Tasks**: task0_*, task1_* synced from previous modules, not tested in CI - -## Modernization Steps Required - -### ✅ Step 0: Analysis and Planning (COMPLETED) -- [x] Read Module-1 modernization documentation -- [x] Analyze Module-2 current structure and dependencies -- [x] Create Module2.md assignment summary -- [x] Create this modernization plan - -### 📋 Step 1: Create Modern pyproject.toml -**Action**: Migrate from legacy setup.py + requirements.txt to modern pyproject.toml -- **Create**: `Module-2/pyproject.toml` -- **Version**: Update from 0.4 to 0.6 -- **Dependencies**: Migrate and update all packages with modern versions -- **Build system**: Use `hatchling` backend (consistent with Module-0/1) -- **Configuration**: Add Ruff, Pyright, and pytest configurations - -**Dependency Migration Strategy**: -- **Base requirements.txt**: colorama, hypothesis, mypy, numba, numpy, pre-commit, pytest, pytest-env, pytest-runner, typing_extensions -- **Extra requirements.txt**: datasets, embeddings, networkx, plotly, pydot, python-mnist, streamlit, streamlit-ace, torch, watchdog -- **Version Updates**: Follow Module-1 pattern for common dependencies -- **New Dependencies**: Preserve Module-2 specific packages (streamlit, plotly, etc.) - -### 📋 Step 2: Remove Legacy setup.py -**Action**: Remove minimal setup.py file -- **Remove**: `Module-2/setup.py` -- **Reason**: Replaced by pyproject.toml modern packaging - -### 📋 Step 3: Remove Legacy Requirements Files -**Action**: Remove both requirements files after verifying all dependencies migrated -- **Remove**: `Module-2/requirements.txt` -- **Remove**: `Module-2/requirements.extra.txt` -- **Verification**: Ensure all dependencies properly migrated to pyproject.toml - -### 📋 Step 4: Remove/Modernize setup.cfg -**Action**: Remove setup.cfg after migrating relevant configurations -- **Remove**: `Module-2/setup.cfg` -- **Migration**: Move any necessary configurations to pyproject.toml -- **Note**: Tool configurations (flake8, mypy, black, isort, darglint) will be replaced by modern Ruff/Pyright - -### 📋 Step 5: Create Modern Pre-commit Configuration -**Action**: Create modern pre-commit hooks to match Module-0/1 -- **Create**: `Module-2/.pre-commit-config.yaml` -- **Content**: Copy exactly from Module-1 (modern toolchain) -- **Tools**: Use Ruff (replaces Black/Flake8/isort) and Pyright (replaces mypy) - -### 📋 Step 6: Create GitHub Actions CI/CD -**Action**: Add automated testing and grading infrastructure -- **Create**: `Module-2/.github/workflows/classroom.yaml` -- **Content**: Copy from Module-1 (GitHub Classroom integration) - -### 📋 Step 7: Create Module-2 Specific Autograding -**Action**: Create autograding configuration for Module-2 tasks only -- **Create**: `Module-2/.github/classroom/autograding.json` -- **Tests**: Only task2_* tests (task2_1, task2_2, task2_3, task2_4, task2_5) + Style check -- **Reasoning**: Module-2 syncs task0_*, task1_* from previous modules, only tests its own tasks - -### 📋 Step 8: Create Documentation Files -**Action**: Create modern documentation matching Module-1 -- **Create**: `Module-2/installation.md` (copy from Module-1, generic content) -- **Create**: `Module-2/testing.md` (adapt for Module-2 specific tasks) -- **Update**: `Module-2/README.md` (modernize and improve) - -### 📋 Step 9: Verification and Testing -**Action**: Ensure modernized setup works correctly -- **Test**: `pip install -e ".[dev,extra]"` installation -- **Test**: `pre-commit run --all-files` functionality -- **Test**: Module-2 specific tasks (pytest -m task2_1, task2_2, etc.) -- **Test**: Import verification (`python -c "import minitorch; print('Success!')"`) - -## Module-2 Specific Dependency Analysis - -### Core Dependencies (requirements.txt) -``` -colorama==0.4.3 → 0.4.6 (update to match Module-1) -hypothesis==6.54 → 6.138.2 (update to match Module-1) -mypy==0.971 → Remove (replace with Pyright) -numba==0.56 → 0.61.2 (update to match Module-1) -numpy==1.22 → >=1.24,<2.3 (update to match Module-1) -pre-commit==2.20.0 → 4.3.0 (move to [dev] group) -pytest==7.1.2 → 8.4.1 (update to match Module-1) -pytest-env → Keep (Module-2 specific) -pytest-runner==5.2 → Remove (not needed with modern packaging) -typing_extensions → Keep (version spec from Module-1) -``` - -### Extra Dependencies (requirements.extra.txt) -``` -datasets==2.4.0 → Keep/Update (Module-2 specific) -embeddings==0.0.8 → Keep (Module-2 specific) -networkx==2.4 → Keep/Update (Module-2 specific) -plotly==4.14.3 → Keep/Update (Module-2 specific) -pydot==1.4.1 → Keep/Update (Module-2 specific) -python-mnist → Keep (Module-2 specific) -streamlit==1.12.0 → Keep/Update (Module-2 specific) -streamlit-ace → Keep (Module-2 specific) -torch → Keep torch==2.8.0 (match Module-1) -watchdog==1.0.2 → Keep/Update (Module-2 specific) -``` - -## Task Structure Analysis - -### Module-2 Test Tasks -- `task2_1`: Tensor data and indexing (test_tensor_data.py) -- `task2_2`: Tensor broadcasting (test_tensor_data.py) -- `task2_3`: Tensor operations (test_tensor.py) -- `task2_4`: Tensor autodifferentiation (test_tensor.py) -- `task2_5`: Training implementation (no specific tests, validation through training) - -### Inherited Tasks -- `task0_*`: Synced from Module-0 via sync_previous_module.py -- `task1_*`: Synced from Module-1 via sync_previous_module.py -- **Note**: Not tested in Module-2 CI, only task2_* tests run - -## Expected File Structure After Modernization - -``` -Module-2/ -├── pyproject.toml # ✅ NEW: Modern packaging -├── installation.md # ✅ NEW: Installation guide -├── testing.md # ✅ NEW: Testing documentation -├── module2-modernization.md # ✅ NEW: This planning document -├── Module2.md # ✅ NEW: Assignment summary -├── README.md # ✅ UPDATED: Modern documentation -├── .pre-commit-config.yaml # ✅ NEW: Modern tools -├── .github/ # ✅ NEW: CI/CD infrastructure -│ ├── workflows/ -│ │ └── classroom.yaml # ✅ NEW: GitHub Actions -│ └── classroom/ -│ └── autograding.json # ✅ NEW: Module-2 specific tests -├── setup.py # ❌ REMOVED: Legacy -├── setup.cfg # ❌ REMOVED: Legacy -├── requirements.txt # ❌ REMOVED: Legacy -└── requirements.extra.txt # ❌ REMOVED: Legacy -``` - -## Benefits Expected -- **Performance**: Ruff is significantly faster than Black+Flake8+isort -- **Consistency**: Unified toolchain matching Module-0 and Module-1 -- **Modern**: Uses current best practices for Python packaging -- **Automated**: CI/CD pipeline for continuous testing -- **Type Safety**: Better type checking with Pyright -- **Maintainability**: Single configuration file (pyproject.toml) -- **Documentation**: Clear installation and testing guides - -## Testing Commands (Post-Modernization) -```bash -# Install in development mode -pip install -e ".[dev,extra]" - -# Run pre-commit -pre-commit run --all-files - -# Test module-specific tasks -pytest -m task2_1 -pytest -m task2_2 -pytest -m task2_3 -pytest -m task2_4 - -# Verify import -python -c "import minitorch; print('Success!')" - -# Run training (Task 2.5) -python project/run_tensor.py -``` - -## Next Steps -After completing this modernization plan: -1. Execute steps 1-9 in sequence -2. Test all functionality thoroughly -3. Document any Module-2 specific issues encountered -4. Use this as template for future module modernizations (Module-3, Module-4) \ No newline at end of file