From 908d622269d75be5d3cd671fcf84d2d285ac4994 Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Wed, 9 Apr 2025 18:06:12 -0500 Subject: [PATCH 1/6] feat: pytorch 2.6.0, version pinning --- official-templates/base/Dockerfile | 2 +- official-templates/pytorch/Dockerfile | 2 +- official-templates/pytorch/README.md | 2 +- official-templates/pytorch/docker-bake.hcl | 30 +++++++++++++++++----- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/official-templates/base/Dockerfile b/official-templates/base/Dockerfile index 6b2afa1..0d44a3d 100644 --- a/official-templates/base/Dockerfile +++ b/official-templates/base/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE +ARG BASE_IMAGE=non-existing FROM ${BASE_IMAGE} SHELL ["/bin/bash", "-o", "pipefail", "-c"] diff --git a/official-templates/pytorch/Dockerfile b/official-templates/pytorch/Dockerfile index 11d6aa8..d5525b7 100644 --- a/official-templates/pytorch/Dockerfile +++ b/official-templates/pytorch/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE +ARG BASE_IMAGE=non-existing FROM ${BASE_IMAGE} ARG TORCH diff --git a/official-templates/pytorch/README.md b/official-templates/pytorch/README.md index 26c644e..0e66adf 100644 --- a/official-templates/pytorch/README.md +++ b/official-templates/pytorch/README.md @@ -7,7 +7,7 @@ Example: ```bash -cd containers/official-templates/pytorch +cd official-templates/pytorch docker buildx bake 280-py311-cuda1281-cudnn-devel-ubuntu2204 --set 280-py311-cuda1281-cudnn-devel-ubuntu2204.platform=linux/amd64 ``` diff --git a/official-templates/pytorch/docker-bake.hcl b/official-templates/pytorch/docker-bake.hcl index ccaad8f..d3a3a05 100644 --- a/official-templates/pytorch/docker-bake.hcl +++ b/official-templates/pytorch/docker-bake.hcl @@ -19,6 +19,7 @@ group "default" { # CUDA 12.4.1 "240-py311-cuda1241-devel-ubuntu2204", "251-py311-cuda1241-devel-ubuntu2204", + "260-py311-cuda1241-devel-ubuntu2204", # CUDA 12.8.1 "280-py311-cuda1281-cudnn-devel-ubuntu2204", @@ -63,6 +64,8 @@ group "cuda" { "220-py310-cuda1211-devel-ubuntu2204", "221-py310-cuda1211-devel-ubuntu2204", "240-py311-cuda1241-devel-ubuntu2204", + "251-py311-cuda1241-devel-ubuntu2204", + "260-py311-cuda1241-devel-ubuntu2204", "280-py311-cuda1281-cudnn-devel-ubuntu2204", ] } @@ -149,7 +152,7 @@ target "211-py310-cuda1211-devel-ubuntu2204" { target "220-py310-cuda1211-devel-ubuntu2204" { dockerfile = "Dockerfile" - tags = ["${PUBLISHER}/pytorch:2.2.0-py3.10-cuda12.1.1-devel-ubuntu22.04"] + tags = ["${PUBLISHER}/pytorch:2.2.0-py3.10-cuda12.1.2511-devel-ubuntu22.04"] contexts = { scripts = "../../container-template" proxy = "../../container-template/proxy" @@ -158,7 +161,7 @@ target "220-py310-cuda1211-devel-ubuntu2204" { args = { BASE_IMAGE = "nvidia/cuda:12.1.1-devel-ubuntu22.04" PYTHON_VERSION = "3.10" - TORCH = "pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0" + TORCH = "torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0" } } @@ -173,7 +176,7 @@ target "221-py310-cuda1211-devel-ubuntu2204" { args = { BASE_IMAGE = "nvidia/cuda:12.1.1-devel-ubuntu22.04" PYTHON_VERSION = "3.10" - TORCH = "torch torchvision torchaudio" + TORCH = "torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121" } } @@ -188,7 +191,7 @@ target "240-py311-cuda1241-devel-ubuntu2204" { args = { BASE_IMAGE = "nvidia/cuda:12.4.1-devel-ubuntu22.04" PYTHON_VERSION = "3.11" - TORCH = "torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124" + TORCH = "torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124" } } @@ -204,7 +207,22 @@ target "251-py311-cuda1241-devel-ubuntu2204" { args = { BASE_IMAGE = "nvidia/cuda:12.4.1-devel-ubuntu22.04" PYTHON_VERSION = "3.11" - TORCH = "torch torchvision torchaudio" + TORCH = "torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1" + } +} + +target "260-py311-cuda1241-devel-ubuntu2204" { + dockerfile = "Dockerfile" + tags = ["${PUBLISHER}/pytorch:2.6.0-py3.11-cuda12.4.1-devel-ubuntu22.04"] + contexts = { + scripts = "../../container-template" + proxy = "../../container-template/proxy" + logo = "../../container-template" + } + args = { + BASE_IMAGE = "nvidia/cuda:12.4.1-devel-ubuntu22.04" + PYTHON_VERSION = "3.11" + TORCH = "torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0" } } @@ -219,7 +237,7 @@ target "280-py311-cuda1281-cudnn-devel-ubuntu2204" { args = { BASE_IMAGE = "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04" PYTHON_VERSION = "3.11" - TORCH = "torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128" + TORCH = "--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128" } } From c25a57bddfe454dd7f3bf9e0f00ef82fd5043724 Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Wed, 9 Apr 2025 18:10:54 -0500 Subject: [PATCH 2/6] fix: stray version change --- official-templates/pytorch/docker-bake.hcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/official-templates/pytorch/docker-bake.hcl b/official-templates/pytorch/docker-bake.hcl index d3a3a05..5ddf874 100644 --- a/official-templates/pytorch/docker-bake.hcl +++ b/official-templates/pytorch/docker-bake.hcl @@ -152,7 +152,7 @@ target "211-py310-cuda1211-devel-ubuntu2204" { target "220-py310-cuda1211-devel-ubuntu2204" { dockerfile = "Dockerfile" - tags = ["${PUBLISHER}/pytorch:2.2.0-py3.10-cuda12.1.2511-devel-ubuntu22.04"] + tags = ["${PUBLISHER}/pytorch:2.2.0-py3.10-cuda12.1.1-devel-ubuntu22.04"] contexts = { scripts = "../../container-template" proxy = "../../container-template/proxy" From c4a5acdbdf5537380b645af0aa09104c8733e76a Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Wed, 9 Apr 2025 22:35:28 -0500 Subject: [PATCH 3/6] fix: update workflow --- .github/workflows/base.yml | 6 ++++-- official-templates/base/Dockerfile | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index c647cb5..ff42b53 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest-public-m steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Clear space to remove unused folders run: | @@ -56,10 +56,12 @@ jobs: fi - name: Build and push the images to Docker Hub - uses: docker/bake-action@v2 + uses: docker/bake-action@v6 with: + source: . files: ./official-templates/base/docker-bake.hcl push: true set: | + allow=fs.read=/home/runner/work ${{ steps.build_type.outputs.is_production != 'true' && format('*.args.RELEASE={0}', env.RELEASE) || '' }} *.args.GITHUB_WORKSPACE=${{ github.workspace }} diff --git a/official-templates/base/Dockerfile b/official-templates/base/Dockerfile index 0d44a3d..e6860a9 100644 --- a/official-templates/base/Dockerfile +++ b/official-templates/base/Dockerfile @@ -208,3 +208,4 @@ RUN echo 'cat /etc/runpod.txt' >> /root/.bashrc RUN echo 'echo -e "\nFor detailed documentation and guides, please visit:\n\033[1;34mhttps://docs.runpod.io/\033[0m and \033[1;34mhttps://blog.runpod.io/\033[0m\n\n"' >> /root/.bashrc CMD ["/start.sh"] + From fa366968d41087986ec6d21b82434fb47844cfde Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Wed, 9 Apr 2025 22:41:24 -0500 Subject: [PATCH 4/6] ci: set buildx var elsewhere --- .github/workflows/base.yml | 7 +++---- official-templates/base/Dockerfile | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index ff42b53..51a2309 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -5,6 +5,9 @@ on: paths: - "official-templates/base/**" +env: + BUILDX_BAKE_ENTITLEMENTS_FS: 0 + jobs: build: runs-on: ubuntu-latest-public-m @@ -42,9 +45,6 @@ jobs: - name: Set environment variables run: | - # Set environment variable to disable entitlements checks - echo "BUILDX_BAKE_ENTITLEMENTS_FS=0" >> $GITHUB_ENV - # Only set RELEASE for development builds # For production, we'll use the default from docker-bake.hcl if [[ "${{ steps.build_type.outputs.is_production }}" != "true" ]]; then @@ -62,6 +62,5 @@ jobs: files: ./official-templates/base/docker-bake.hcl push: true set: | - allow=fs.read=/home/runner/work ${{ steps.build_type.outputs.is_production != 'true' && format('*.args.RELEASE={0}', env.RELEASE) || '' }} *.args.GITHUB_WORKSPACE=${{ github.workspace }} diff --git a/official-templates/base/Dockerfile b/official-templates/base/Dockerfile index e6860a9..0d44a3d 100644 --- a/official-templates/base/Dockerfile +++ b/official-templates/base/Dockerfile @@ -208,4 +208,3 @@ RUN echo 'cat /etc/runpod.txt' >> /root/.bashrc RUN echo 'echo -e "\nFor detailed documentation and guides, please visit:\n\033[1;34mhttps://docs.runpod.io/\033[0m and \033[1;34mhttps://blog.runpod.io/\033[0m\n\n"' >> /root/.bashrc CMD ["/start.sh"] - From 4bb7cc4405612bca9a84f8c96776dc5177f19d64 Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Wed, 9 Apr 2025 22:47:29 -0500 Subject: [PATCH 5/6] ci: downgrade buildx run command --- .github/workflows/base.yml | 6 +++--- official-templates/base/Dockerfile | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 51a2309..d792451 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -5,8 +5,6 @@ on: paths: - "official-templates/base/**" -env: - BUILDX_BAKE_ENTITLEMENTS_FS: 0 jobs: build: @@ -56,7 +54,9 @@ jobs: fi - name: Build and push the images to Docker Hub - uses: docker/bake-action@v6 + uses: docker/bake-action@v2 # latest: v6 + env: + BUILDX_BAKE_ENTITLEMENTS_FS: 0 with: source: . files: ./official-templates/base/docker-bake.hcl diff --git a/official-templates/base/Dockerfile b/official-templates/base/Dockerfile index 0d44a3d..e6860a9 100644 --- a/official-templates/base/Dockerfile +++ b/official-templates/base/Dockerfile @@ -208,3 +208,4 @@ RUN echo 'cat /etc/runpod.txt' >> /root/.bashrc RUN echo 'echo -e "\nFor detailed documentation and guides, please visit:\n\033[1;34mhttps://docs.runpod.io/\033[0m and \033[1;34mhttps://blog.runpod.io/\033[0m\n\n"' >> /root/.bashrc CMD ["/start.sh"] + From 4fcf422ec918de1508ca85567bd9af5b52f8580e Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Thu, 10 Apr 2025 11:25:00 -0500 Subject: [PATCH 6/6] ci: undo most changes to ci --- .github/workflows/base.yml | 2 +- README.md | 2 +- official-templates/base/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index d792451..8d34f32 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest-public-m steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v3 - name: Clear space to remove unused folders run: | diff --git a/README.md b/README.md index d12535e..6013643 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ The following dependencies are required as part of RunPod platform functionality ### runpod.yaml -Each container foulder needs to have a runpod.yaml file. This file will contain version info as well as services to be ran. The runpod.yaml file should be formatted as follows: +Each container folder needs to have a runpod.yaml file. This file will contain version info as well as services to be ran. The runpod.yaml file should be formatted as follows: ```yaml version: '1.0.0' diff --git a/official-templates/base/Dockerfile b/official-templates/base/Dockerfile index e6860a9..797f4ea 100644 --- a/official-templates/base/Dockerfile +++ b/official-templates/base/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=non-existing +ARG BASE_IMAGE FROM ${BASE_IMAGE} SHELL ["/bin/bash", "-o", "pipefail", "-c"]