diff --git a/README.md b/README.md index 817878ef..e96bf63b 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,11 @@ Make sure to go through the [contributing guide](https://github.com/DataBiospher [terra-jupyter-bioconductor](terra-jupyter-bioconductor/README.md) +## (Small base images for developers) +[terra-jupyter-minimal-base](terra-jupyter-minimal-base/README.md) + +[terra-jupyter-minimal-gpu-base](terra-jupyter-minimal-gpu-base/README.md) + # How to create your own Custom image to use with notebooks on Terra Custom docker images need to use a Terra base image (see above) in order to work with the service that runs notebooks on Terra. * You can use any of the base images above @@ -29,6 +34,7 @@ Custom docker images need to use a Terra base image (see above) in order to work * Since 6/28/2021, we introduced a few changes that might impact building custom images - Home directory of new images will be `/home/jupyter`. This means if your dockerfile is referencing `/home/jupyter-user` directory, you need to update it to $HOME (recommended) or `/home/jupyter`. - Creating VMs with custom images will take much longer than terra supported images because `docker pull` will take a few min. If the custom image ends up being too large, VM creation may time out. New base images are much larger in size than previous versions. + - Consider using the "minimal" base images # Development ## Using git secrets diff --git a/terra-jupyter-minimal-base/CHANGELOG.md b/terra-jupyter-minimal-base/CHANGELOG.md new file mode 100644 index 00000000..cf3da776 --- /dev/null +++ b/terra-jupyter-minimal-base/CHANGELOG.md @@ -0,0 +1,37 @@ +## 0.0.3 - 11/08/2023 + +- Extends Ubuntu 20.04 base image +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda + - installs libmamba solver and makes it default ( + [see here](https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community/)) +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + +## 0.0.2 - 08/30/2023 + +- Extends Ubuntu 20.04 base image +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + +## 0.0.1 - 08/30/2022 + +- Extends Ubuntu 20.04 base image +- Add google-cloud-cli +- Add Python 3.7 +- Add Miniconda +- Add Jupyter +- Add Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + +Image URL: `us.gcr.io/broad-dsde-methods/terra-jupyter-minimal-base:0.0.1` diff --git a/terra-jupyter-minimal-base/Dockerfile b/terra-jupyter-minimal-base/Dockerfile new file mode 100644 index 00000000..68e1f5bf --- /dev/null +++ b/terra-jupyter-minimal-base/Dockerfile @@ -0,0 +1,147 @@ +ARG BASE_IMAGE="ubuntu:20.04" + +FROM ${BASE_IMAGE} + +USER root + +# The welder uid is consistent with the Welder docker definition here: +# https://github.com/DataBiosphere/welder/blob/master/project/Settings.scala +# Adding welder-user to the Jupyter container isn't strictly required, but it makes welder-added +# files display nicer when viewed in a terminal. +ENV DEBIAN_FRONTEND=noninteractive \ + LC_ALL=en_US.UTF-8 \ + GOOGLE_CLOUD_CLI_VERSION="454.0.0" \ + USER=jupyter \ + WELDER_USER=welder-user \ + WELDER_UID=1001 \ + # ensure this matches c.NotebookApp.port in jupyter_notebook_config.py + JUPYTER_PORT=8000 \ + JUPYTER_HOME=/etc/jupyter \ + MINICONDA_VERSION="py310_23.5.2-0" \ + CONDA_AUTO_UPDATE_CONDA=false \ + CONDA_DIR=/opt/conda \ + GCLOUD_DIR=/opt/gcloud +ENV HOME=/home/$USER + # When using PIP_USER=true packages are installed into Python site.USER_BASE, which is '/home/jupyter' for this system. + # Append '/home/jupyter/.local/bin' to PATH + # pip docs: https://pip.pypa.io/en/stable/reference/pip_install/#cmdoption-user +ENV PATH="${GCLOUD_DIR}/google-cloud-sdk/bin:${CONDA_DIR}/bin:${HOME}/.local/bin:${HOME}/packages/bin:${PATH}" + +COPY requirements.txt /opt/ +COPY requirements_gcc.txt /opt/ +COPY gcc_pkgs.txt /opt/ + +# Users +RUN useradd -m -s /bin/bash $USER \ + && usermod -g users $USER \ + && useradd -m -s /bin/bash -N -u $WELDER_UID $WELDER_USER \ +# Prerequisites + && apt-get update && apt-get install -yq --no-install-recommends \ + sudo \ + && sudo -i \ + echo "deb http://security.ubuntu.com/ubuntu/ bionic main" >> /etc/apt/sources.list \ + sudo apt update libexempi3 \ + && sudo -i \ + echo "deb http://us.archive.ubuntu.com/ubuntu/ bionic universe" >> /etc/apt/sources.list \ + sudo apt update libv8-3.14-dev \ + && apt-get update && apt-get install -yq --no-install-recommends \ + # gnupg requirement + dirmngr \ + gnupg \ + # curl requirement + curl \ + ca-certificates \ + # useful utilities for debugging within the docker + nano \ + procps \ + # extras \ + wget \ + bzip2 \ + # install script requirements + locales \ + # for ssh-agent and ssh-add + keychain \ + # git + git \ + # Uncomment en_US.UTF-8 for inclusion in generation + && sudo sed -i 's/^# *\(en_US.UTF-8\)/\1/' /etc/locale.gen \ + # Generate locale + && sudo locale-gen \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ +# Install miniconda to $CONDA_DIR + && curl -so $HOME/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh \ + && chmod +x $HOME/miniconda.sh \ + && $HOME/miniconda.sh -b -p $CONDA_DIR \ + && rm $HOME/miniconda.sh \ + && conda install -n base -c conda-forge conda-libmamba-solver==23.5.0 \ + && conda config --set solver libmamba \ +# Install gsutil with compiled crcmod + && mkdir -p $GCLOUD_DIR \ + && curl -so $GCLOUD_DIR/google-cloud-cli.tar.gz \ + https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-${GOOGLE_CLOUD_CLI_VERSION}-linux-x86_64.tar.gz \ + && tar -xzf $GCLOUD_DIR/google-cloud-cli.tar.gz -C $GCLOUD_DIR \ + && .$GCLOUD_DIR/google-cloud-sdk/install.sh --install-python false --usage-reporting false --rc-path $HOME/.bashrc \ + # manual symlinks since .bashrc is not getting sourced and PATH is getting overwritten in Terra Cloud Environment + && ln -s $GCLOUD_DIR/google-cloud-sdk/bin/gsutil /usr/bin/gsutil \ + && ln -s $GCLOUD_DIR/google-cloud-sdk/bin/anthoscli /usr/bin/anthoscli \ + && ln -s $GCLOUD_DIR/google-cloud-sdk/bin/bq /usr/bin/bq \ + && ln -s $GCLOUD_DIR/google-cloud-sdk/bin/docker-credential-gcloud /usr/bin/docker-credential-gcloud \ + && ln -s $GCLOUD_DIR/google-cloud-sdk/bin/gcloud /usr/bin/gcloud \ + # creates the directory $HOME/.config/gcloud/configurations \ + && conda install -y -c conda-forge crcmod \ + && gsutil version -l \ + && chown -R $USER:users $HOME/.conda \ + && chown -R $USER:users $HOME/.config/gcloud \ +# Slim install of python packages that have a gcc dependency (cleanup included) + && apt-get update && apt-get install -yq --no-install-recommends \ + $(cat /opt/gcc_pkgs.txt) \ + && pip3 install -r /opt/requirements_gcc.txt \ + && apt-get purge -y --auto-remove \ + $(cat /opt/gcc_pkgs.txt) \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ +# Install jupyter and some necessary python packages + # tmp hack min-5 + # I'm not installing jupyterlab and I can't update init-actions.sh to not access it + && mkdir -p /usr/local/share/jupyter/lab \ + # When we upgraded from jupyter 5.7.8 to 6.1.1, we broke terminal button on terra-ui. + # Hence, make sure to manually test out "launch terminal" button (the button in the green bar next to start and stop buttons) + # to make sure we don't accidentally break it every time we upgrade notebook version until we figure out an automation test for this + && pip3 install -r /opt/requirements.txt \ +# Copy workspace_cromwell.py script and make it runnable by all users + && curl -o /usr/local/bin/workspace_cromwell.py \ + https://raw.githubusercontent.com/broadinstitute/cromwhelm/1ceedf89587cffd355f37401b179001f029f77ed/scripts/workspace_cromwell.py \ + && chmod +x /usr/local/bin/workspace_cromwell.py \ +# Mamba + && conda install -y -c conda-forge mamba==1.5.1 \ + && conda install -y -c conda-forge libarchive==3.6.2 requests==2.31.0 \ +# Enable dropdown menu showing all conda envs as jupyter kernels + && conda install -c anaconda ipykernel \ +# Cleanup + && rm -rf ~/.cache/pip \ + && conda clean -yaf \ + && chown -R $USER:users $CONDA_DIR + +# Utilities +COPY scripts $JUPYTER_HOME/scripts +COPY custom $JUPYTER_HOME/custom +COPY jupyter_notebook_config.py $JUPYTER_HOME + +RUN chown -R $USER:users $JUPYTER_HOME \ + && find $JUPYTER_HOME/scripts -name '*.sh' -type f | xargs chmod +x \ + # You can get kernel directory by running `jupyter kernelspec list` + && $JUPYTER_HOME/scripts/kernel/kernelspec.sh $JUPYTER_HOME/scripts/kernel $CONDA_DIR/share/jupyter/kernels + +# make pip install to a user directory, instead of a system directory which requires root. +# this is useful so `pip install` commands can be run in the context of a notebook. +ENV PIP_USER=true +USER $USER +EXPOSE $JUPYTER_PORT +WORKDIR $HOME + +# Note: this entrypoint is provided for running Jupyter independently of Leonardo. +# When Leonardo deploys this image onto a cluster, the entrypoint is overwritten to enable +# additional setup inside the container before execution. Jupyter execution occurs when the +# init-actions.sh script uses 'docker exec' to call run-jupyter.sh. +ENTRYPOINT ["jupyter", "notebook"] diff --git a/terra-jupyter-minimal-base/README.md b/terra-jupyter-minimal-base/README.md new file mode 100644 index 00000000..76ea64a9 --- /dev/null +++ b/terra-jupyter-minimal-base/README.md @@ -0,0 +1,39 @@ +# terra-jupyter-minimal-base image + +This repo contains the terra-jupyter-minimal-base image that is compatible with +notebook service in [Terra]("https://app.terra.bio/") called Leonardo. + +## Image contents + +`terra-jupyter-minimal-base` extends an Ubuntu 20.04 base image with the minimum +requirements necessary to set up Jupyter and provide compatibility with Leonardo. + +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) +- Full list of python packages is available [here](requirements.txt) and + [here](requirements_gcc.txt) + +To see the complete contents of this image please see the [Dockerfile](./Dockerfile). + +## Notes + +- Currently, the environment variable `WORKSPACE_BUCKET` is not getting set + correctly when this image is used to create a Cloud Environment in Terra + - The value is `'.'` + - This will be fixed in future versions + - For now, it's recommended to avoid using this environment variable + +## Selecting prior versions of this image + +To select an older version this image, you can search the [CHANGELOG.md](./CHANGELOG.md) +for a specific package version you need. + +Once you find an image version that you want, simply copy and paste the image +url from the changelog into the corresponding custom docker field in the Terra +notebook runtime widget. diff --git a/terra-jupyter-minimal-base/build_docker.sh b/terra-jupyter-minimal-base/build_docker.sh new file mode 100755 index 00000000..f4605ee7 --- /dev/null +++ b/terra-jupyter-minimal-base/build_docker.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# this copying stuff is here because there are requirements here and in ../terra-jupyter-base/ +# but the build context cannot be both +cp -r ../terra-jupyter-base/custom custom +cp -r ../terra-jupyter-base/scripts scripts +cp -r ../terra-jupyter-base/jupyter_notebook_config.py jupyter_notebook_config.py + +docker build -t terra-jupyter-minimal-base:0.0.3 . + +rm -r custom +rm -r scripts +rm jupyter_notebook_config.py + +# if requirements.txt, requirements_gcc.txt, and gcc_pkgs.txt get moved to the ../terra-jupyter-base/ folder, +# then all of the above could be replaced by this: +#docker build -t terra-jupyter-minimal-base:0.0.1 -f Dockerfile ../terra-jupyter-base/ diff --git a/terra-jupyter-minimal-base/gcc_pkgs.txt b/terra-jupyter-minimal-base/gcc_pkgs.txt new file mode 100644 index 00000000..74d41996 --- /dev/null +++ b/terra-jupyter-minimal-base/gcc_pkgs.txt @@ -0,0 +1,16 @@ +checkinstall +build-essential +zlib1g-dev +libssl-dev +libbz2-dev +libreadline-dev +libsqlite3-dev +llvm +libncurses5-dev +libncursesw5-dev +tk-dev +libffi-dev +liblzma-dev +python-openssl +libexempi3 +libv8-3.14-dev \ No newline at end of file diff --git a/terra-jupyter-minimal-base/requirements.txt b/terra-jupyter-minimal-base/requirements.txt new file mode 100644 index 00000000..37862361 --- /dev/null +++ b/terra-jupyter-minimal-base/requirements.txt @@ -0,0 +1,10 @@ +notebook==6.5.5 +cookiecutter==2.3.0 +tornado==6.3.3 +python-datauri==2.1.0 +jupyter_contrib_nbextensions==0.7.0 +jupyter_nbextensions_configurator==0.6.3 +markupsafe==2.1.2 +nbconvert==7.2.9 +nbclassic<0.5 +cromshell==2.0.0 \ No newline at end of file diff --git a/terra-jupyter-minimal-base/requirements_gcc.txt b/terra-jupyter-minimal-base/requirements_gcc.txt new file mode 100644 index 00000000..a6905fc8 --- /dev/null +++ b/terra-jupyter-minimal-base/requirements_gcc.txt @@ -0,0 +1,2 @@ +terra-notebook-utils==0.13.0 +firecloud==0.16.35 \ No newline at end of file diff --git a/terra-jupyter-minimal-gpu-base/CHANGELOG.md b/terra-jupyter-minimal-gpu-base/CHANGELOG.md new file mode 100644 index 00000000..83d55274 --- /dev/null +++ b/terra-jupyter-minimal-gpu-base/CHANGELOG.md @@ -0,0 +1,41 @@ +## 0.0.3 - 11/08/2023 + +- Extends `nvcr.io/nvidia/cuda:12.2.2-base-ubuntu20.04` base image + - CUDA 12.2 +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda + - installs libmamba solver and makes it default ( + [see here](https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community/)) +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + + +## 0.0.2 - 08/30/2023 + +- Extends `nvcr.io/nvidia/cuda:11.8.0-base-ubuntu20.04` base image + - CUDA 11.8 +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + +## 0.0.1 - 08/30/2022 + +- Extends `nvcr.io/nvidia/cuda:11.3.1-base-ubuntu20.04` base image + - CUDA 11.3 +- Add google-cloud-cli +- Add Python 3.7 +- Add Miniconda +- Add Jupyter +- Add Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) + +Image URL: `us.gcr.io/broad-dsde-methods/terra-jupyter-minimal-gpu-base:0.0.1` diff --git a/terra-jupyter-minimal-gpu-base/README.md b/terra-jupyter-minimal-gpu-base/README.md new file mode 100644 index 00000000..6c6233fb --- /dev/null +++ b/terra-jupyter-minimal-gpu-base/README.md @@ -0,0 +1,50 @@ +# terra-jupyter-minimal-gpu-base image + +This repo contains the terra-jupyter-minimal-gpu-base image that is compatible with +notebook service in [Terra]("https://app.terra.bio/") called Leonardo. + +## Image contents + +`terra-jupyter-minimal-gpu-base` extends an image from Nvidia built on top of +Ubuntu 20.04 called `nvcr.io/nvidia/cuda:11.8.0-base-ubuntu20.04`, which has CUDA and +Nvidia drivers installed. This image adds the minimum +requirements necessary to set up Jupyter and provide compatibility with Leonardo. + +- CUDA 11.4 +- OS prerequisites +- google-cloud-cli +- Python 3.10 +- Miniconda +- Mamba +- Jupyter +- Leonardo customizations/extensions +- Terra python client library ([FISS](https://github.com/broadinstitute/fiss)) +- Full list of python packages is available [here](requirements.txt) and + [here](requirements_gcc.txt) + +To see the complete contents of this image please see the +[Dockerfile](../terra-jupyter-minimal-base/Dockerfile) (the same Dockerfile as +`terra-jupyter-minimal-base`), +which builds `FROM nvcr.io/nvidia/cuda:11.3.1-base-ubuntu20.04` +(the build command is [here](./build_docker.sh)). + +## Notes + +- The philosophy here is minimalism, so that this image can be built upon. +To that end, neither `pytorch` nor `tensorflow` are pre-installed here, +so that will need to be done by images built on top of `terra-jupyter-minimal-gpu-base`. + +- Currently, the environment variable `WORKSPACE_BUCKET` is not getting set + correctly when this image is used to create a Cloud Environment in Terra + - The value is `'.'` + - This will be fixed in future versions + - For now, it's recommended to avoid using this environment variable + +## Selecting prior versions of this image + +To select an older version this image, you can search the [CHANGELOG.md](./CHANGELOG.md) +for a specific package version you need. + +Once you find an image version that you want, simply copy and paste the image +url from the changelog into the corresponding custom docker field in the Terra +notebook runtime widget. diff --git a/terra-jupyter-minimal-gpu-base/build_docker.sh b/terra-jupyter-minimal-gpu-base/build_docker.sh new file mode 100755 index 00000000..4dd66bc1 --- /dev/null +++ b/terra-jupyter-minimal-gpu-base/build_docker.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +base="nvcr.io/nvidia/cuda:12.2.2-base-ubuntu20.04" + +cp -r ../terra-jupyter-base/custom ../terra-jupyter-minimal-base/custom +cp -r ../terra-jupyter-base/scripts ../terra-jupyter-minimal-base/scripts +cp -r ../terra-jupyter-base/jupyter_notebook_config.py ../terra-jupyter-minimal-base/jupyter_notebook_config.py + +docker build \ + -t terra-jupyter-minimal-gpu-base:0.0.3 \ + -f ../terra-jupyter-minimal-base/Dockerfile \ + --build-arg BASE_IMAGE=${base} \ + ../terra-jupyter-minimal-base/ + +rm -r ../terra-jupyter-minimal-base/custom +rm -r ../terra-jupyter-minimal-base/scripts +rm ../terra-jupyter-minimal-base/jupyter_notebook_config.py