diff --git a/.github/classroom/autograding.json b/.github/classroom/autograding.json new file mode 100644 index 00000000..9c124daa --- /dev/null +++ b/.github/classroom/autograding.json @@ -0,0 +1,54 @@ +{ + "tests": [ + { + "name": "Style", + "setup": "python -m venv .venv; . .venv/bin/activate; pip3 install -qe .[dev]; pip3 install -q pre-commit", + "run": ". .venv/bin/activate && pre-commit run --all", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.1", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_1", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.2", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_2", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.3", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_3", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + }, + { + "name": "Task 2.4", + "setup": "sudo -H pip3 install -qe .", + "run": "pytest -m task2_4", + "input": "", + "output": "", + "comparison": "included", + "timeout": 10, + "points": 10 + } + ] +} \ No newline at end of file diff --git a/.github/workflows/classroom.yaml b/.github/workflows/classroom.yaml new file mode 100644 index 00000000..2853c181 --- /dev/null +++ b/.github/workflows/classroom.yaml @@ -0,0 +1,16 @@ +name: GitHub Classroom Workflow + +on: [push] + +permissions: + checks: write + actions: read + contents: read + +jobs: + build: + name: Autograding + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: education/autograding@v1 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9f521073..0230ad9c 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,5 @@ dmypy.json .pyre/ *.\#* data/ -pyodide \ No newline at end of file +pyodideModule2.md +module2-modernization.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05e8b56d..ae349ac8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -25,38 +25,21 @@ repos: - id: check-toml - id: debug-statements - id: mixed-line-ending - - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/timothycrosley/isort - rev: 5.10.1 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.9 hooks: - - id: isort + # Run the linter. + - id: ruff-check + args: [ --fix ] + # Run the formatter. + - id: ruff-format -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.971 +- repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.404 hooks: - - id: mypy - exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/) - - -# Black, the code formatter, natively supports pre-commit -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - -# Flake8 also supports pre-commit natively (same author) -- repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 - hooks: - - id: flake8 - additional_dependencies: - - pep8-naming - exclude: ^(docs/)|(assignments/)|(project/interface/) - -# Doc linters -- repo: https://github.com/terrencepreilly/darglint - rev: v1.8.1 - hooks: - - id: darglint + - id: pyright + additional_dependencies: + - pytest + - hypothesis \ No newline at end of file diff --git a/README.md b/README.md index 9304eaab..86d30044 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,66 @@ +**Tensors** - Extending Autodifferentiation to Multidimensional Arrays * Docs: https://minitorch.github.io/ - * Overview: https://minitorch.github.io/module2/module2/ -This assignment requires the following files from the previous assignments. You can get these by running +## Overview + +Module 2 introduces **Tensors** - multidimensional arrays that extend the scalar autodifferentiation system from Module 1. While the scalar system is correct, it's inefficient due to Python overhead. Tensors solve this by grouping operations together and enabling faster implementations. + +## Installation + +See [installation.md](installation.md) for detailed setup instructions. + +## Quick Start + +```bash +# Install dependencies +pip install -e ".[dev,extra]" + +# Sync files from Module 1 +python sync_previous_module.py ../Module-1 . + +# Verify installation +python -c "import minitorch; print('Success!')" + +# Run tests +pytest -m task2_1 # Tensor data and indexing +pytest -m task2_2 # Tensor broadcasting +pytest -m task2_3 # Tensor operations +pytest -m task2_4 # Tensor autodifferentiation + +# Train tensor-based model +python project/run_tensor.py +``` + +## Tasks + +- **Task 2.1**: Implement tensor data structures with indexing and strides +- **Task 2.2**: Implement tensor broadcasting for operations between different shapes +- **Task 2.3**: Implement tensor operations (map, zip, reduce) and mathematical functions +- **Task 2.4**: Extend autodifferentiation to work with tensors and broadcasting +- **Task 2.5**: Create tensor-based neural network training + +## Testing + +See [testing.md](testing.md) for detailed testing instructions. + +## Files + +This assignment requires the following files from Module 1. You can get these by running: ```bash -python sync_previous_module.py previous-module-dir current-module-dir +python sync_previous_module.py ../Module-1 . ``` The files that will be synced are: - minitorch/operators.py minitorch/module.py minitorch/autodiff.py minitorch/scalar.py minitorch/module.py project/run_manual.py project/run_scalar.py \ No newline at end of file +- `minitorch/operators.py` +- `minitorch/module.py` +- `minitorch/autodiff.py` +- `minitorch/scalar.py` +- `project/run_manual.py` +- `project/run_scalar.py` \ No newline at end of file diff --git a/installation.md b/installation.md new file mode 100644 index 00000000..2ae0765e --- /dev/null +++ b/installation.md @@ -0,0 +1,98 @@ +--- +hide: + - navigation +--- + +# MiniTorch Module 2 Installation + +MiniTorch requires Python 3.8 or higher. To check your version of Python, run: + +```bash +>>> python --version +``` + +We recommend creating a global MiniTorch workspace directory that you will use +for all modules: + +```bash +>>> mkdir workspace; cd workspace +``` + +## Environment Setup + +We highly recommend setting up a *virtual environment*. The virtual environment lets you install packages that are only used for your assignments and do not impact the rest of the system. + +**Option 1: Anaconda (Recommended)** +```bash +>>> conda create --name minitorch python # Run only once +>>> conda activate minitorch +>>> conda install llvmlite # For optimization +``` + +**Option 2: Venv** +```bash +>>> python -m venv venv # Run only once +>>> source venv/bin/activate +``` + +The first line should be run only once, whereas the second needs to be run whenever you open a new terminal to get started for the class. You can tell if it works by checking if your terminal starts with `(minitorch)` or `(venv)`. + +## Getting the Code + +Each assignment is distributed through a Git repo. Once you accept the assignment from GitHub Classroom, a personal repository under Cornell-Tech-ML will be created for you. You can then clone this repository to start working on your assignment. + +```bash +>>> git clone {{ASSIGNMENT}} +>>> cd {{ASSIGNMENT}} +``` + +## Syncing Previous Module Files + +Module 2 requires files from Module 0 and Module 1. Sync them using: + +```bash +>>> python sync_previous_module.py +``` + +Example: +```bash +>>> python sync_previous_module.py ../Module-1 . +``` + +Replace `` with the path to your Module 1 directory and `` with `.` for the current directory. + +This will copy the following required files: +- `minitorch/operators.py` +- `minitorch/module.py` +- `minitorch/autodiff.py` +- `minitorch/scalar.py` +- `tests/test_module.py` +- `tests/test_operators.py` +- `tests/test_autodiff.py` +- `tests/test_scalar.py` +- `project/run_manual.py` +- `project/run_scalar.py` + +## Installation + +Install all packages in your virtual environment: + +```bash +>>> python -m pip install -e ".[dev,extra]" +``` + +## Verification + +Make sure everything is installed by running: + +```bash +>>> python -c "import minitorch; print('Success!')" +``` + +Verify that the tensor functionality is available: + +```bash +>>> python -c "from minitorch import tensor; print('Module 2 ready!')" +``` + +You're ready to start Module 2! \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..19ba3991 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,192 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "minitorch" +version = "0.6" +description = "A minimal deep learning library for educational purposes" +requires-python = ">=3.8" +dependencies = [ + "colorama==0.4.6", + "hypothesis==6.138.2", + "numba==0.61.2", + "numpy>=1.24,<2.3", + "pytest==8.4.1", + "pytest-env==1.1.5", + "typing_extensions", +] + +[project.optional-dependencies] +dev = [ + "pre-commit==4.3.0", +] +extra = [ + "datasets==2.4.0", + "embeddings==0.0.8", + "networkx==3.5", + "plotly==5.24.1", + "pydot==1.4.1", + "python-mnist", + "streamlit==1.48.1", + "streamlit-ace", + "torch==2.8.0", + "watchdog==1.0.2", +] + +[tool.pyright] +include = ["**/minitorch"] +exclude = [ + "**/docs", + "**/docs/module2/**", + "**/assignments", + "**/project", + "**/mt_diagrams", + "**/.*", + "*chainrule.py*", + "**/minitorch/autodiff.py", + "sync_previous_module.py", +] +venvPath = "." +venv = ".venv" +reportUnknownMemberType = "none" +reportUnknownParameterType = "none" +reportUnknownArgumentType = "none" +reportUnknownVariableType = "none" +reportMissingTypeArgument = "none" +reportMissingTypeStubs = "none" +reportUnusedExpression = "none" +reportUnknownLambdaType = "none" +reportIncompatibleMethodOverride = "none" +reportPrivateUsage = "none" +reportMissingParameterType = "error" +reportMissingImports = "none" + +[tool.pytest.ini_options] +markers = [ + "task0_0", + "task0_1", + "task0_2", + "task0_3", + "task0_4", + "task1_0", + "task1_1", + "task1_2", + "task1_3", + "task1_4", + "task2_0", + "task2_1", + "task2_2", + "task2_3", + "task2_4", + "task3_0", + "task3_1", + "task3_2", + "task3_3", + "task3_4", + "task4_0", + "task4_1", + "task4_2", + "task4_3", + "task4_4", +] +[tool.ruff] +exclude = [ + ".git", + "__pycache__", + "**/docs/slides/*", + "old,build", + "dist", + "**/project/**/*", + "**/mt_diagrams/*", + "**/minitorch/testing.py", + "**/docs/**/*", + "minitorch/optim.py", + "minitorch/datasets.py", + "minitorch/scalar.py", + "minitorch/autodiff.py", + "minitorch/module.py", + "minitorch/tensor.py", + "minitorch/tensor_data.py", + "minitorch/tensor_functions.py", + "minitorch/tensor_ops.py", + "sync_previous_module.py", +] + +[tool.ruff.lint] +ignore = [ + "ANN401", + "N801", + "E203", + "E266", + "E501", + "E741", + "N803", + "N802", + "N806", + "D400", + "D401", + "D105", + "D415", + "D402", + "D205", + "D100", + "D101", + "D107", + "D213", + "ANN204", + "D203" +] +select = ["D", "E", "F", "N", "ANN"] +fixable = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "I", + "N", + "Q", + "S", + "T", + "W", + "ANN", + "ARG", + "BLE", + "COM", + "DJ", + "DTZ", + "EM", + "ERA", + "EXE", + "FBT", + "ICN", + "INP", + "ISC", + "NPY", + "PD", + "PGH", + "PIE", + "PL", + "PT", + "PTH", + "PYI", + "RET", + "RSE", + "RUF", + "SIM", + "SLF", + "TCH", + "TID", + "TRY", + "UP", + "YTT", +] +unfixable = [] + +[tool.ruff.lint.extend-per-file-ignores] +"tests/**/*.py" = ["D"] +"minitorch/scalar_functions.py" = ["ANN001", "ANN201"] +"minitorch/tensor_functions.py" = ["ANN001", "ANN201"] \ No newline at end of file diff --git a/requirements.extra.txt b/requirements.extra.txt deleted file mode 100644 index a3df13c6..00000000 --- a/requirements.extra.txt +++ /dev/null @@ -1,10 +0,0 @@ -datasets==2.4.0 -embeddings==0.0.8 -networkx==2.4 -plotly==4.14.3 -pydot==1.4.1 -python-mnist -streamlit==1.12.0 -streamlit-ace -torch -watchdog==1.0.2 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 40efede3..00000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -colorama==0.4.3 -hypothesis == 6.54 -mypy == 0.971 -numba == 0.56 -numpy == 1.22 -pre-commit == 2.20.0 -pytest == 7.1.2 -pytest-env -pytest-runner == 5.2 -typing_extensions diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 8a5fbd84..00000000 --- a/setup.cfg +++ /dev/null @@ -1,61 +0,0 @@ -[metadata] -name=minitorch -version=0.4 - -[files] -packages = - minitorch - -[darglint] -ignore_regex=((^_(.*))|(.*map)|(.*zip)|(.*reduce)|(test.*)|(tensor_.*)) -docstring_style=google -strictness=long - -[flake8] -ignore = N801, E203, E266, E501, W503, F812, E741, N803, N802, N806 -exclude = .git,__pycache__,docs/*,old,build,dist - -[isort] -profile=black -src_paths=minitorch,test - -[mypy] -strict = True -ignore_missing_imports = True -exclude=^(docs/)|(project/)|(assignments/) -implicit_reexport = True - -[mypy-tests.*] -disallow_untyped_decorators = False -implicit_reexport = True - -[black] -exclude=^(docs/)|(project/)|(assignments/) - -[tool:pytest] -markers = - task0_0 - task0_1 - task0_2 - task0_3 - task0_4 - task1_0 - task1_1 - task1_2 - task1_3 - task1_4 - task2_0 - task2_1 - task2_2 - task2_3 - task2_4 - task3_0 - task3_1 - task3_2 - task3_3 - task3_4 - task4_0 - task4_1 - task4_2 - task4_3 - task4_4 diff --git a/setup.py b/setup.py deleted file mode 100644 index ff4cfa9f..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup(py_modules=[]) diff --git a/sync_previous_module.py b/sync_previous_module.py index 5f682c89..2bbce42b 100644 --- a/sync_previous_module.py +++ b/sync_previous_module.py @@ -1,46 +1,72 @@ """ -Description: -Note: Make sure that both the new and old module files are in same directory! +Sync Previous Module Files -This script helps you sync your previous module works with current modules. -It takes 2 arguments, source_dir_name and destination_dir_name. -All the files which will be moved are specified in files_to_sync.txt as newline separated strings +This script helps you sync files from your previous module to the current module. +It copies files specified in 'files_to_sync.txt' from the source directory to the destination directory. -Usage: python sync_previous_module.py +Usage: python sync_previous_module.py -Ex: python sync_previous_module.py mle-module-0-sauravpanda24 mle-module-1-sauravpanda24 +Examples: + python sync_previous_module.py ./my-awesome-module-1 ./my-awesome-module-2 + python sync_previous_module.py ~/assignments/Module-1-unicorn_ninja ~/assignments/Module-2-unicorn_ninja """ import os import shutil import sys -if len(sys.argv) != 3: - print('Invalid argument count! Please pass source directory and destination directory after the file name') - sys.exit() +def print_usage(): + """Print usage information and examples.""" + print(__doc__) -# Get the users path to evaluate the username and root directory -current_path = os.getcwd() -grandparent_path = '/'.join(current_path.split('/')[:-1]) +def read_files_to_sync(): + """Read the list of files to sync from files_to_sync.txt""" + try: + with open("files_to_sync.txt", "r") as f: + return f.read().splitlines() + except FileNotFoundError: + print("Error: files_to_sync.txt not found!") + sys.exit(1) -print('Looking for modules in : ', grandparent_path) +def sync_files(source, dest, files_to_move): + """Copy files from source to destination directory.""" + if not os.path.exists(source): + print(f"Error: Source directory '{source}' does not exist!") + sys.exit(1) -# List of files which we want to move -f = open('files_to_sync.txt', 'r+') -files_to_move = f.read().splitlines() -f.close() + if not os.path.exists(dest): + print(f"Error: Destination directory '{dest}' does not exist!") + sys.exit(1) -# get the source and destination from arguments -source = sys.argv[1] -dest = sys.argv[2] - -# copy the files from source to destination -try: + copied_files = 0 for file in files_to_move: - print(f"Moving file : {file}") - shutil.copy( - os.path.join(grandparent_path, source, file), - os.path.join(grandparent_path, dest, file), - ) - print(f"Finished moving {len(files_to_move)} files") -except Exception: - print("Something went wrong! please check if the source and destination folders are present in same folder") + source_path = os.path.join(source, file) + dest_path = os.path.join(dest, file) + + if not os.path.exists(source_path): + print(f"Warning: File '{file}' not found in source directory, skipping") + continue + + try: + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + shutil.copy(source_path, dest_path) + print(f"Copied: {file}") + copied_files += 1 + except Exception as e: + print(f"Error copying '{file}': {e}") + + print(f"Finished copying {copied_files} files") + +def main(): + if len(sys.argv) != 3: + print("Error: Invalid number of arguments!") + print_usage() + sys.exit(1) + + source = sys.argv[1] + dest = sys.argv[2] + files_to_move = read_files_to_sync() + + sync_files(source, dest, files_to_move) + +if __name__ == "__main__": + main() diff --git a/testing.md b/testing.md new file mode 100644 index 00000000..cbf7b0d7 --- /dev/null +++ b/testing.md @@ -0,0 +1,146 @@ +## Testing Your Implementation + +### Running Tests + +This project uses pytest for testing. Tests are organized by task: + +```bash +# Run all tests for a specific task +pytest -m task2_1 # Tensor data and indexing +pytest -m task2_2 # Tensor broadcasting +pytest -m task2_3 # Tensor operations +pytest -m task2_4 # Tensor autodifferentiation + +# Run all tests +pytest + +# Run tests with verbose output +pytest -v + +# Run a specific test file +pytest tests/test_tensor_data.py # Tensor data structure tests +pytest tests/test_tensor.py # Tensor operations and autodiff tests +pytest tests/test_operators.py # Basic operators (from Module 0) +pytest tests/test_module.py # Module system tests (from Module 0) +pytest tests/test_scalar.py # Scalar tests (from Module 1) +pytest tests/test_autodiff.py # Autodiff tests (from Module 1) + +# Run a specific test function +pytest tests/test_tensor_data.py::test_index_to_position +pytest tests/test_tensor.py::test_tensor_sum +``` + +### Module 2 Specific Tests + +**Task 2.1 - Tensor Data:** +- Tests tensor indexing and storage management +- Verifies stride calculations and memory layout +- Checks permutation operations +- Tests `index_to_position` and `to_index` functions + +**Task 2.2 - Tensor Broadcasting:** +- Tests broadcasting rules for different tensor shapes +- Verifies `shape_broadcast` and `broadcast_index` functions +- Checks edge cases with dimension alignment +- Tests operations between tensors of different sizes + +**Task 2.3 - Tensor Operations:** +- Tests high-level tensor operations (map, zip, reduce) +- Verifies mathematical functions (add, mul, sigmoid, relu, etc.) +- Checks tensor creation and manipulation +- Tests tensor properties and methods + +**Task 2.4 - Tensor Autodifferentiation:** +- Tests gradient computation through tensor operations +- Verifies backpropagation with broadcasting +- Checks gradient accumulation and chain rule +- Tests complex computational graphs with tensors + +### Style and Code Quality Checks + +This project enforces code style and quality using several tools: + +```bash +# Run all pre-commit hooks (recommended) +pre-commit run --all-files + +# Individual style checks: +ruff check . # Linting (style, imports, docstrings) +ruff format . # Code formatting +pyright . # Type checking +``` + +### Understanding Test Output + +**Property Testing with Hypothesis:** +- Tests use hypothesis to generate random tensor shapes and values +- If a test fails, Hypothesis will show you the minimal failing example +- This helps you understand edge cases in your tensor implementation + +**Common Test Failures:** +- `AssertionError`: Your function returned an unexpected tensor or gradient +- `TypeError`: Missing or incorrect type annotations +- `ImportError`: Function not implemented or incorrectly named +- `AttributeError`: Missing methods in tensor classes +- `IndexingError`: Issues with tensor indexing or broadcasting + +**Gradient Testing:** +- Many tests compare your computed gradients against numerical approximations +- Small differences (< 1e-5) are usually acceptable due to floating point precision +- Large differences indicate errors in your derivative implementations + +**Broadcasting Errors:** +- Tests will check that tensors with incompatible shapes raise appropriate errors +- Verify that your broadcasting functions handle edge cases correctly + +### Task 2.5 - Training + +**Training Script:** +```bash +# Run tensor-based training +python project/run_tensor.py +``` + +**Expected Output:** +- Should train faster than scalar implementation +- Record time per epoch for performance comparison +- Train on all datasets: Simple, Diag, Split, Xor + +### Pre-commit Hooks (Automatic Style Checking) + +The project uses pre-commit hooks that run automatically before each commit: + +```bash +# Install pre-commit hooks (one-time setup) +pre-commit install + +# Now style checks run automatically on every commit +git commit -m "your message" # Will run style checks first +``` + +### GitHub Classroom Autograder + +The autograder runs the same tests and style checks: + +1. **Style Check (10 points)**: All pre-commit hooks must pass +2. **Task 2.1 (15 points)**: Tensor data and indexing implementation +3. **Task 2.2 (15 points)**: Tensor broadcasting implementation +4. **Task 2.3 (15 points)**: Tensor operations implementation +5. **Task 2.4 (15 points)**: Tensor autodifferentiation implementation +6. **Task 2.5 (30 points)**: Training and performance verification + +### Debugging Tools + +**Interactive Debugging:** +```bash +# Launch tensor visualization app +streamlit run project/app.py -- 2 + +# Test specific tensor operations +python -c "from minitorch import tensor; t = tensor([1,2,3]); print(t)" +``` + +**Performance Testing:** +- Compare training times between scalar and tensor implementations +- Verify that tensor operations are significantly faster +- Monitor memory usage with larger tensor operations \ No newline at end of file diff --git a/tests/tensor_strategies.py b/tests/tensor_strategies.py index e3186b86..edd3d152 100644 --- a/tests/tensor_strategies.py +++ b/tests/tensor_strategies.py @@ -106,7 +106,6 @@ def matmul_tensors( allow_nan=False, min_value=-100, max_value=100 ), ) -> List[Tensor]: - i, j, k = [draw(integers(min_value=1, max_value=10)) for _ in range(3)] l1 = (i, j)