diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..971059d3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +.git +build +__pycache__ +*.pyc +*.pyo +*.egg-info +*.egg +dist +*.csv +*.jpg +*.png diff --git a/README.md b/README.md index c646c6bf..8495eb5e 100644 --- a/README.md +++ b/README.md @@ -96,20 +96,64 @@ Benchmark result on the following configurations: ## Installation -### Prerequisites +### Docker (recommended for MI355X) -- pytorch:rocm >= 6.4.0 -- Linux packages: see packages in dockerfile +Build a self-contained image with all dependencies pre-installed: -Or build docker image with: +```bash +cd mori +docker build -t rocm/mori:benchmark -f docker/Dockerfile.dev . +``` + +Launch a container with GPU access: + +```bash +bash docker/run_benchmark.sh +# or manually: +docker run --rm -it \ + --device=/dev/kfd --device=/dev/dri \ + --ipc=host --security-opt seccomp=unconfined \ + --cap-add=SYS_PTRACE --group-add video --group-add render \ + rocm/mori:benchmark bash ``` -cd mori && docker build -t rocm/mori:dev -f docker/Dockerfile.dev . + +Run CCL benchmarks inside the container: + +```bash +# AllReduce sweep (2-256 MB) +bash tests/python/ccl/bench_allreduce_sweep.sh + +# AllGather / ReduceScatter standalone latency sweep +bash tests/python/ccl/bench_allgather_sweep.sh +bash tests/python/ccl/bench_reducescatter_sweep.sh + +# AllGather / ReduceScatter + GEMM overlap sweep +bash tests/python/ccl/bench_ag_overlap_sweep.sh +bash tests/python/ccl/bench_rs_overlap_sweep.sh ``` -### Install with Python +> **Note**: The default `MORI_GPU_ARCHS` is `gfx950` (MI355X). To build for MI300X, pass `--build-arg MORI_GPU_ARCHS=gfx942`. + +### Install without Docker + +Prerequisites: +- PyTorch with ROCm (version must match your system ROCm, e.g. `pip install torch --index-url https://download.pytorch.org/whl/rocm7.1`) +- Linux packages: `git`, `cython3`, `ibverbs-utils`, `openmpi-bin`, `libopenmpi-dev`, `libpci-dev`, `cmake`, `libdw1`, `locales` +- For GEMM overlap tests: `pip install amd-aiter ninja` + +```bash +cd mori +pip install -r requirements-build.txt +git submodule update --init --recursive +export MORI_GPU_ARCHS=gfx950 # or gfx942 for MI300X +pip3 install . # add --no-build-isolation if using venv ``` -# NOTE: for venv build, add --no-build-isolation at the end -cd mori && pip install -r requirements-build.txt && git submodule update --init --recursive && pip3 install . + +Required environment variables: + +```bash +export PYTHONPATH=/path/to/mori:$PYTHONPATH +export HSA_NO_SCRATCH_RECLAIM=1 ``` ### Test dispatch / combine diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index fcd330a6..cbfdae22 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -1,5 +1,4 @@ -# FROM rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.12_pytorch_release_2.5.1 -FROM rocm/pytorch:rocm6.4.3_ubuntu22.04_py3.10_pytorch_release_2.5.1 +FROM rocm/pytorch:rocm7.1.1_ubuntu22.04_py3.10_pytorch_release_2.10.0 RUN apt-get update && \ apt-get install -y \ @@ -11,4 +10,24 @@ RUN apt-get update && \ libpci-dev \ cmake \ libdw1 \ - locales + locales && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir \ + amd-aiter ninja prettytable pytest-assume \ + setuptools-scm cmake 'setuptools>=65' + +COPY . /workspace/mori +WORKDIR /workspace/mori + +ARG MORI_GPU_ARCHS=gfx950 +ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.1.0 +RUN MORI_GPU_ARCHS=${MORI_GPU_ARCHS} \ + PYTORCH_ROCM_ARCH=${MORI_GPU_ARCHS} \ + pip3 install . --no-cache-dir --no-build-isolation + +ENV PYTHONPATH=/workspace/mori:${PYTHONPATH} +ENV HSA_NO_SCRATCH_RECLAIM=1 +ENV PATH=/root/.local/bin:${PATH} + +CMD ["/bin/bash"] diff --git a/docker/run_benchmark.sh b/docker/run_benchmark.sh new file mode 100755 index 00000000..822154f1 --- /dev/null +++ b/docker/run_benchmark.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +IMAGE_NAME="${1:-rocm/mori:benchmark}" + +docker run --rm -it \ + --device=/dev/kfd \ + --device=/dev/dri \ + --ipc=host \ + --security-opt seccomp=unconfined \ + --cap-add=SYS_PTRACE \ + --group-add video \ + --group-add render \ + "$IMAGE_NAME" \ + bash