From a8a143d7df1a8f4eb1044587e51514056d64a317 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Wed, 19 Nov 2025 17:23:12 -0500
Subject: [PATCH 01/22] [AN-551] New terra base image
---
.../main/resources/init-resources/gce-init.sh | 65 ++++++++++---------
.../resources/init-resources/init-actions.sh | 29 +++++----
.../main/resources/init-resources/startup.sh | 8 +--
.../BaseCloudServiceRuntimeMonitor.scala | 4 +-
4 files changed, 54 insertions(+), 52 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 50196db598..12c67b143c 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -40,8 +40,9 @@ START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
IS_GCE_FORMATTED=$(isGceFormatted)
# Needs to be in sync with terra-docker container
JUPYTER_HOME=/etc/jupyter
-JUPYTER_SCRIPTS=$JUPYTER_HOME/scripts
-JUPYTER_USER_HOME=$(jupyterHomeDirectory)
+JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
+USER_HOME=$(jupyterHomeDirectory)
RSTUDIO_SCRIPTS=/etc/rstudio/scripts
SERVER_CRT=$(proxyServerCrt)
SERVER_KEY=$(proxyServerKey)
@@ -410,13 +411,11 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# Install everything after having mounted the empty PD
# This should not be needed anymore if the jupyter home is a directory of the PD mount point
# See: https://github.com/DataBiosphere/leonardo/pull/4465/files
- if [ ! "$JUPYTER_USER_HOME" = "/home/jupyter" ] ; then
+ if [ ! "$USER_HOME" = "/home/jupyter" ] ; then
# TODO: Remove once we stop supporting non AI notebooks based images
- log 'Installing Jupyter kernelspecs...(Remove once we stop supporting non AI notebooks based images)'
- KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
-
+ log 'Installing Jupyter kernelspecs'
# Install kernelspecs inside the Jupyter container
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
fi
# Install notebook.json which is used to populate Jupyter.notebook.config in JavaScript extensions.
@@ -447,14 +446,14 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
fi
done
fi
@@ -468,9 +467,9 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
fi
done
fi
@@ -485,9 +484,9 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
fi
done
fi
@@ -503,14 +502,14 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp -r $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
fi
done
fi
@@ -524,24 +523,26 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
# For older jupyter images, jupyter_delocalize.py is using 127.0.0.1 as welder's url, which won't work now that we're no longer using `network_mode: host` for GCE VMs
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' /etc/jupyter/custom/jupyter_delocalize.py"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' $JUPYTER_EXTENSIONS/jupyter_delocalize.py"
+
+ log 'Wget the gitignore_global file, set gitignore in Git Config'
# Copy gitignore into jupyter container (ask AOU?)
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global \
+ && git config --global core.excludesfile $USER_HOME/gitignore_global"
+
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "whoami"
- # Install nbstripout and set gitignore in Git Config (ask AOU?)
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
- && nbstripout --install --global \
- && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "ls -l $JUPYTER_EXTENSIONS"
# Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
- && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
+ && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
@@ -566,7 +567,7 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
STEP_TIMINGS+=($(date +%s))
log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+ retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
# done start Jupyter
STEP_TIMINGS+=($(date +%s))
@@ -639,4 +640,4 @@ log 'All done!'
ELAPSED_TIME=$(($END_TIME - $START_TIME))
log "gce-init.sh took $(display_time $ELAPSED_TIME)"
-log "Step timings: ${STEP_TIMINGS[@]}"
+log "Step timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index a48d0f2d99..08315b6059 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -159,7 +159,8 @@ if [[ "${ROLE}" == 'Master' ]]; then
bash add-google-cloud-ops-agent-repo.sh --also-install
JUPYTER_HOME=/etc/jupyter
- JUPYTER_SCRIPTS=${JUPYTER_HOME}/scripts
+ JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+ JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
# Set variables
@@ -168,7 +169,7 @@ if [[ "${ROLE}" == 'Master' ]]; then
# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
export CLOUD_SERVICE='DATAPROC'
# Needs to be in sync with terra-docker container
- export JUPYTER_USER_HOME=$(jupyterHomeDirectory)
+ export USER_HOME=$(jupyterHomeDirectory)
export CLUSTER_NAME=$(clusterName)
export RUNTIME_NAME=$(clusterName)
export GOOGLE_PROJECT=$(googleProject)
@@ -360,7 +361,7 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_EXTENSIONS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
@@ -459,7 +460,7 @@ EOF
# jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
# A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py"
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
# This is to make it so that older images will still work after we change notebooks location to home dir
@@ -471,20 +472,20 @@ EOF
# Install nbstripout and set gitignore in Git Config
docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
&& python -m nbstripout --install --global \
- && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global"
+ && git config --global core.excludesfile $USER_HOME/gitignore_global"
# Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
- && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
+ && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+ retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
STEP_TIMINGS+=($(date +%s))
fi
@@ -533,4 +534,4 @@ SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron
fi
log 'All done!'
-log "Timings: ${STEP_TIMINGS[@]}"
+log "Timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh
index 7e90819ac2..8ba25adcad 100644
--- a/http/src/main/resources/init-resources/startup.sh
+++ b/http/src/main/resources/init-resources/startup.sh
@@ -35,7 +35,7 @@ else
DOCKER_COMPOSE='docker-compose'
DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
fi
-export JUPYTER_USER_HOME=$(jupyterHomeDirectory)
+export USER_HOME=$(jupyterHomeDirectory)
export RSTUDIO_USER_HOME=/home/rstudio
export GOOGLE_PROJECT=$(googleProject)
export CLUSTER_NAME=$(clusterName)
@@ -145,7 +145,7 @@ function validateCert() {
DATAPROC_IMAGES_TO_RESTART+=(-f /etc/jupyter-docker-compose.yaml )
fi
- if [ "${CLOUD_SERVICE}" == 'DATAPROC']
+ if [ "${CLOUD_SERVICE}" == 'DATAPROC' ]
then
${DOCKER_COMPOSE} "${DATAPROC_IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
else
@@ -312,7 +312,7 @@ else
# jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
# A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/extensions/jupyter_delocalize.py"
fi
fi
@@ -366,7 +366,7 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
# Start Jupyter server
- docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/scripts/run-jupyter.sh $NOTEBOOKS_DIR || /opt/conda/bin/jupyter notebook)"
+ docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/run-jupyter.sh $NOTEBOOKS_DIR || /etc/jupyter/bin/jupyter notebook)"
fi
# Configuring RStudio, if enabled
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
index d6cc6e3a64..8a462ad22d 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
@@ -112,7 +112,7 @@ abstract class BaseCloudServiceRuntimeMonitor[F[_]] {
runtimeAndRuntimeConfig: RuntimeAndRuntimeConfig,
errorDetails: RuntimeErrorDetails,
mainInstance: Option[DataprocInstance],
- deleteRuntime: Boolean = true
+ deleteRuntime: Boolean = false
)(implicit
ev: Ask[F, AppContext]
): F[CheckResult] =
@@ -590,7 +590,7 @@ abstract class BaseCloudServiceRuntimeMonitor[F[_]] {
Some("tool_start_up")
),
mainDataprocInstance,
- deleteRuntimeOnFail
+ false
)
}
} yield r
From 1f19e770ff4cbc72393156f335841d5b85c3373e Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 21 Nov 2025 11:11:33 -0500
Subject: [PATCH 02/22] move localization scripts to site-packages
---
http/src/main/resources/init-resources/gce-init.sh | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 12c67b143c..13c0313e6a 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -566,6 +566,11 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# done start user script
STEP_TIMINGS+=($(date +%s))
+ # Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
+ RUN mv $JUPYTER_HOME/extensions/jupyter_delocalize.py $JUPYTER_HOME/lib/python3.10/site-packages
+ RUN mv $JUPYTER_HOME/extensions/jupyter_localize_extension.py $JUPYTER_HOME/lib/python3.10/site-packages
+
+
log 'Starting Jupyter Notebook...'
retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
From 7db9b52ecf5b0855e753effea70094d0c9755404 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 21 Nov 2025 11:37:24 -0500
Subject: [PATCH 03/22] wrong path
---
http/src/main/resources/init-resources/gce-init.sh | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 13c0313e6a..3120393d76 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -567,9 +567,8 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
STEP_TIMINGS+=($(date +%s))
# Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
- RUN mv $JUPYTER_HOME/extensions/jupyter_delocalize.py $JUPYTER_HOME/lib/python3.10/site-packages
- RUN mv $JUPYTER_HOME/extensions/jupyter_localize_extension.py $JUPYTER_HOME/lib/python3.10/site-packages
-
+ RUN mv $JUPYTER_EXTENSIONS/jupyter_delocalize.py $JUPYTER_HOME/lib/python3.10/site-packages
+ RUN mv $JUPYTER_EXTENSIONS/jupyter_localize_extension.py $JUPYTER_HOME/lib/python3.10/site-packages
log 'Starting Jupyter Notebook...'
retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
From 2e3e0d52c59de5291bfe154a1ca7520ccbe9572f Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 21 Nov 2025 12:13:14 -0500
Subject: [PATCH 04/22] fix command
---
.../main/resources/init-resources/gce-init.sh | 4 ++--
.../resources/init-resources/init-actions.sh | 23 ++++++++++---------
2 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 3120393d76..d567bf9e51 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -567,8 +567,8 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
STEP_TIMINGS+=($(date +%s))
# Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
- RUN mv $JUPYTER_EXTENSIONS/jupyter_delocalize.py $JUPYTER_HOME/lib/python3.10/site-packages
- RUN mv $JUPYTER_EXTENSIONS/jupyter_localize_extension.py $JUPYTER_HOME/lib/python3.10/site-packages
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_localize_extension.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
log 'Starting Jupyter Notebook...'
retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index 08315b6059..a51279730e 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -159,7 +159,7 @@ if [[ "${ROLE}" == 'Master' ]]; then
bash add-google-cloud-ops-agent-repo.sh --also-install
JUPYTER_HOME=/etc/jupyter
- JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+# JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
@@ -276,6 +276,7 @@ EOF
# If any image is hosted in a GAR registry (detected by regex) then
# authorize docker to interact with gcr.io.
# NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect.
+ # TODO (LM) test with non-broad account
if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
log 'Authorizing GCR/GAR...'
gcloud auth configure-docker
@@ -361,14 +362,14 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_EXTENSIONS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
fi
done
fi
@@ -384,9 +385,9 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME}${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
fi
done
fi
@@ -403,9 +404,9 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
fi
done
fi
@@ -439,14 +440,14 @@ EOF
gsutil cp -r $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
fi
done
fi
From c926ef7b1b7ff38f5d0f854351643aacc1b681ca Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:21:21 -0500
Subject: [PATCH 05/22] updating paths
---
.../resources/init-resources/jupyter-docker-compose-gce.yaml | 4 ++--
.../main/resources/init-resources/jupyter-docker-compose.yaml | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
index 375e4209ee..d94f5eb763 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
@@ -33,8 +33,8 @@ services:
R_LIBS: "${NOTEBOOKS_DIR}/packages"
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
# We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
- PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
index 34f538e706..1a8d7f8e88 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
@@ -51,8 +51,8 @@ services:
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2.
# We should remove the two lines once we no longer support older images.
# When we update base image in terra-docker next time, we should verify the paths are still valid
- PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HOME}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
From 8dedb32d499559fa550ed4424262a418e65630f2 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Mon, 1 Dec 2025 11:31:10 -0500
Subject: [PATCH 06/22] env var
---
http/src/main/resources/init-resources/init-actions.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index a51279730e..a411e8b49e 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -159,7 +159,7 @@ if [[ "${ROLE}" == 'Master' ]]; then
bash add-google-cloud-ops-agent-repo.sh --also-install
JUPYTER_HOME=/etc/jupyter
-# JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+ JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
@@ -535,4 +535,4 @@ SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron
fi
log 'All done!'
-log "Timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
+log "Timings: ${STEP_TIMINGS[@]}"
From 7c091af130a168b81f6c0ee8c17d5cc12757560e Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:31:08 -0500
Subject: [PATCH 07/22] add a new init-resources directory
---
.../test-crypto-detector-docker-compose.yaml | 1 +
.../test-google_plugin_jupyter.js | 1 +
.../test-google_plugin_jupyterlab.js | 1 +
.../test-google_sign_in.js | 2 ++
.../base-init-resources/test-init-actions.sh | 19 +++++++++++++++++++
.../test-install-jupyter-extension.sh | 3 +++
.../test-jupyter-docker-compose-gce.yaml | 1 +
.../test-jupyter-docker-compose.yaml | 1 +
.../test-proxy-docker-compose.yaml | 1 +
.../test-rstudio-docker-compose.yaml | 1 +
.../base-init-resources/test-site.conf | 1 +
.../test-welder-docker-compose.yaml | 1 +
.../config/ClusterResourcesConfig.scala | 3 ++-
.../leonardo/util/GceInterpreter.scala | 9 ++++++++-
14 files changed, 43 insertions(+), 2 deletions(-)
create mode 100644 http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
create mode 100644 http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
create mode 100644 http/src/main/resources/base-init-resources/test-google_sign_in.js
create mode 100644 http/src/main/resources/base-init-resources/test-init-actions.sh
create mode 100644 http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
create mode 100644 http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
create mode 100644 http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/test-site.conf
create mode 100644 http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
diff --git a/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
new file mode 100644
index 0000000000..2e8fb9de5b
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
@@ -0,0 +1 @@
+crypto
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js b/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
new file mode 100644
index 0000000000..63e50e2724
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
@@ -0,0 +1 @@
+alert("Hello World!");
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js b/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
new file mode 100644
index 0000000000..b1af1b69e8
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
@@ -0,0 +1 @@
+alert("Hello Lab!");
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_sign_in.js b/http/src/main/resources/base-init-resources/test-google_sign_in.js
new file mode 100644
index 0000000000..80d24390c4
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-google_sign_in.js
@@ -0,0 +1,2 @@
+$(userEmailLoginHint)
+$(defaultClientId)
diff --git a/http/src/main/resources/base-init-resources/test-init-actions.sh b/http/src/main/resources/base-init-resources/test-init-actions.sh
new file mode 100644
index 0000000000..c182cd90ea
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-init-actions.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+$(clusterName)
+$(googleProject)
+$(jupyterDockerImage)
+$(rstudioDockerImage)
+$(proxyDockerImage)
+$(jupyterUserScriptUri)
+$(jupyterStartUserScriptUri)
+$(jupyterServiceAccountCredentials)
+$(jupyterServerExtensions)
+$(jupyterNbExtensions)
+$(jupyterCombinedExtensions)
+$(jupyterUserScriptOutputUri)
+$(jupyterNotebookConfigUri)
+$(jupyterNotebookFrontendConfigUri)
+$(customEnvVarsConfigUri)
+$(memLimit)
+$(shmSize)
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh b/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
new file mode 100644
index 0000000000..283c8e8018
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo "Hello World"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
new file mode 100644
index 0000000000..257cc5642c
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
@@ -0,0 +1 @@
+foo
diff --git a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
new file mode 100644
index 0000000000..1910281566
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
@@ -0,0 +1 @@
+foo
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
new file mode 100644
index 0000000000..18cd353694
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
@@ -0,0 +1 @@
+proxy
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
new file mode 100644
index 0000000000..ba7985a3fa
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
@@ -0,0 +1 @@
+rstudio
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-site.conf b/http/src/main/resources/base-init-resources/test-site.conf
new file mode 100644
index 0000000000..9f26b637f0
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-site.conf
@@ -0,0 +1 @@
+Foo
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
new file mode 100644
index 0000000000..d43a6afa23
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
@@ -0,0 +1 @@
+welder
\ No newline at end of file
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
index e3cffcb18c..9d64028883 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
@@ -18,5 +18,6 @@ case class ClusterResourcesConfig(
)
object ClusterResourcesConfig {
- val basePath = "init-resources"
+ val path = "init-resources"
+ val basePath = "base-init-resources"
}
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
index a08489221d..2a2fcbeda3 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
@@ -17,6 +17,7 @@ import org.broadinstitute.dsde.workbench.google2.{
SubnetworkName,
ZoneName
}
+import org.broadinstitute.dsde.workbench.leonardo.RuntimeImageType.Jupyter
import org.broadinstitute.dsde.workbench.leonardo.config.ClusterResourcesConfig
import org.broadinstitute.dsde.workbench.leonardo.dao.WelderDAO
import org.broadinstitute.dsde.workbench.leonardo.dao.google._
@@ -111,8 +112,14 @@ class GceInterpreter[F[_]](
.fromOption(config.clusterResourcesConfig.cloudInit,
new LeoException("No cloud init file defined for GCE VM.", traceId = Some(ctx.traceId))
)
+ // if the user is using the new terra-base jupyter image, a different set of init scripts is needed
+ // the terra-base init scripts have some different paths etc and are within the folder 'base-init-resources'
+ initResourcesPath =
+ if (params.runtimeImages.exists(img => img.imageType == Jupyter && img.imageUrl.contains("terra-base")))
+ ClusterResourcesConfig.basePath
+ else ClusterResourcesConfig.path
cloudInitFileContent = scala.io.Source
- .fromResource(s"${ClusterResourcesConfig.basePath}/${cloudInit.asString}")
+ .fromResource(s"${initResourcesPath}/${cloudInit.asString}")
.getLines()
.toList
.mkString("\n")
From 18b545a13b7dfd612f3c4a4533b86dc49c053c54 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:38:21 -0500
Subject: [PATCH 08/22] fix paths
---
.../resources/base-init-resources/README.md | 23 +
.../base-init-resources/cloud-init.yml | 12 +
.../base-init-resources/cluster-site-gce.conf | 79 +++
.../base-init-resources/cluster-site.conf | 100 +++
.../resources/base-init-resources/gce-init.sh | 647 ++++++++++++++++++
.../gpu-docker-compose.yaml | 15 +
.../base-init-resources/init-actions.sh | 538 +++++++++++++++
.../jupyter-docker-compose-gce.yaml | 46 ++
.../jupyter-docker-compose.yaml | 61 ++
.../base-init-resources/notebook.json | 7 +
.../proxy-docker-compose-gce.yaml | 21 +
.../proxy-docker-compose.yaml | 19 +
.../rstudio-docker-compose-gce.yaml | 35 +
.../rstudio-docker-compose.yaml | 28 +
.../resources/base-init-resources/shutdown.sh | 40 ++
.../resources/base-init-resources/startup.sh | 399 +++++++++++
.../test-crypto-detector-docker-compose.yaml | 1 -
.../test-google_plugin_jupyter.js | 1 -
.../test-google_plugin_jupyterlab.js | 1 -
.../test-google_sign_in.js | 2 -
.../base-init-resources/test-init-actions.sh | 19 -
.../test-install-jupyter-extension.sh | 3 -
.../test-jupyter-docker-compose-gce.yaml | 1 -
.../test-jupyter-docker-compose.yaml | 1 -
.../test-proxy-docker-compose.yaml | 1 -
.../test-rstudio-docker-compose.yaml | 1 -
.../base-init-resources/test-site.conf | 1 -
.../test-welder-docker-compose.yaml | 1 -
.../welder-docker-compose-gce.yaml | 44 ++
.../welder-docker-compose.yaml | 38 +
30 files changed, 2152 insertions(+), 33 deletions(-)
create mode 100644 http/src/main/resources/base-init-resources/README.md
create mode 100644 http/src/main/resources/base-init-resources/cloud-init.yml
create mode 100755 http/src/main/resources/base-init-resources/cluster-site-gce.conf
create mode 100755 http/src/main/resources/base-init-resources/cluster-site.conf
create mode 100644 http/src/main/resources/base-init-resources/gce-init.sh
create mode 100644 http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/init-actions.sh
create mode 100644 http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
create mode 100644 http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/notebook.json
create mode 100644 http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
create mode 100644 http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
create mode 100644 http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/shutdown.sh
create mode 100644 http/src/main/resources/base-init-resources/startup.sh
delete mode 100644 http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
delete mode 100644 http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
delete mode 100644 http/src/main/resources/base-init-resources/test-google_sign_in.js
delete mode 100644 http/src/main/resources/base-init-resources/test-init-actions.sh
delete mode 100644 http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
delete mode 100644 http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
delete mode 100644 http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/test-site.conf
delete mode 100644 http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
create mode 100644 http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
create mode 100644 http/src/main/resources/base-init-resources/welder-docker-compose.yaml
diff --git a/http/src/main/resources/base-init-resources/README.md b/http/src/main/resources/base-init-resources/README.md
new file mode 100644
index 0000000000..cc49061356
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/README.md
@@ -0,0 +1,23 @@
+# Jupyter nbextension development guide
+
+## Running locally
+To run plugins off local js files:
+1. [Install jupyter](https://jupyter.org/install)
+2. Run this command with the arguments to ensure the extension updates when you make changes to the .js files: `jupyter nbextension install /[absolute path to leo repo]/leonardo/src/main/resources/jupyter/ --symlink`
+3. To test edit-mode or safe-mode extensions, you must update the file to use local urls. For edit-mode, you can find a section of 4 variables near the top labelled `URLS for local testing` and a section labelled `URLS for leo deployment` above it. You can comment out the `URLS for leo deployment` and uncomment `URLS for local testing` (TODO: find a better way to do this)
+4. Run this for each extension in the jupyter/ dir you want enabled: ```jupyter nbextension enable jupyter/[File name WITHOUT EXTENSION]``` I.E., `jupyter nbextension enable jupyter/edit-mode`
+5. Run `jupyter notebook`. It should open the jupyter server in the browser window. You can verify the appropriate extension loaded via openning the developer console abd going to the `Sources` tab. On the file explorer on the left, you should find a folder called `nbextensions` containing the loaded extensions, possibly in `nbextensions -> jupyter`. Here you can place breakpoints to test functionality.
+
+## Misc Info
+
+Look at the jupyter_notebook_config and ensure your local config emulates what the settings are found in this file (of interest are port number and cors/auth settings)
+
+At the time of writing, there are 3 nbextensions, edit-mode.js, safe-mode.js, and google_sign_in.js
+
+extension_entry.js controls which plugins are loaded into the jupyter server image
+
+POST storageLinks/:
+`curl -vX POST --header 'Content-Type: application/json' --header 'Accept: application/json' [welderUrl]/storageLinks -d '{"localBaseDirectory": "[local dir relative to dir in welder conf, ex 'edit']", "localSafeModeBaseDirectory": "[local dir relative to dir in welder conf, ex 'safe']", "cloudStorageDirectory": "gs://jc-sample-bucket", "pattern": "*" }'`
+
+POST localize/:
+`curl -vX POST --header 'Content-Type: application/json' --header 'Accept: application/json' localhost:8081/objects -d '{"action" : "localize", "entries": [{ "sourceUri": "gs://jc-sample-bucket/Untitled.ipynb", "localDestinationPath": "edit/Untitled.ipynb" }] }'`
diff --git a/http/src/main/resources/base-init-resources/cloud-init.yml b/http/src/main/resources/base-init-resources/cloud-init.yml
new file mode 100644
index 0000000000..46973abfc5
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/cloud-init.yml
@@ -0,0 +1,12 @@
+#cloud-config
+
+write_files:
+ - path: /etc/systemd/system/google-shutdown-scripts.service.d/override.conf
+ permissions: 0644
+ owner: root
+ content: |
+ [Unit]
+ After=docker.service
+
+runcmd:
+ - systemctl daemon-reload
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/cluster-site-gce.conf b/http/src/main/resources/base-init-resources/cluster-site-gce.conf
new file mode 100755
index 0000000000..9f4c4dbf8e
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/cluster-site-gce.conf
@@ -0,0 +1,79 @@
+
+
+ SSLEngine on
+ SSLProxyEngine on
+ SSLCertificateFile "/etc/ssl/certs/server.crt"
+ SSLCertificateKeyFile "/etc/ssl/private/server.key"
+ SSLCACertificateFile "/etc/ssl/certs/ca-bundle.crt"
+
+ SSLVerifyClient require
+ SSLVerifyDepth 10
+
+ ServerName ${PROXY_SERVER_HOST_NAME}
+ UseCanonicalName on
+ ProxyRequests off
+
+ RewriteEngine on
+
+ ################
+ # RStudio
+ ################
+ RewriteCond %{HTTP:Upgrade} =websocket
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) ws://rstudio:8001/$1 [P,L]
+
+ RewriteCond %{HTTP:Upgrade} !=websocket
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) http://rstudio:8001/$1 [P,L]
+
+ # Include a ProxyPassReverse so redirects by RStudio go to the correct server name (e.g. https://notebooks.firecloud.org)
+ # Need to include both http and https, as RStudio redirects to https in some cases.
+ ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ http://rstudio:8001/
+ ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ https://rstudio:8001/
+
+ # Append SameSite=None to cookies set by RStudio. This is required by some browsers because we
+ # render RStudio in an iframe. There does not appear to be a way within RStudio to do this, hence
+ # doing it in the proxy.
+ # [IA-4997] to support CHIPS by setting partitioned cookies
+ # Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly;Partitioned "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
+ Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
+
+ ####################
+ # Welder
+ ####################
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/welder/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/welder/(.*) http://welder:8080/$1 [P,L]
+
+ #####################################
+ # Jupyter (legacy /notebooks path)
+ #####################################
+
+ RewriteCond %{HTTP:Upgrade} =websocket
+ RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
+ RewriteRule .* ws://jupyter:8000%{REQUEST_URI} [P,L]
+
+ RewriteCond %{HTTP:Upgrade} !=websocket
+ RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
+ RewriteRule .* http://jupyter:8000%{REQUEST_URI} [P,L]
+
+ # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
+
+ ################################
+ # Jupyter (newer /proxy path)
+ ################################
+
+ # This needs to be coordinated with a change in jupyter_notebooks_config.py
+ # which is why we haven't yet enabled this.
+
+ # RewriteCond %{HTTP:Upgrade} =websocket
+ # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
+ # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) ws://127.0.0.1:8000/$1 [P,L]
+
+ # RewriteCond %{HTTP:Upgrade} !=websocket
+ # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
+ # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) http://127.0.0.1:8000/$1 [P,L]
+
+ # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
+
+
diff --git a/http/src/main/resources/base-init-resources/cluster-site.conf b/http/src/main/resources/base-init-resources/cluster-site.conf
new file mode 100755
index 0000000000..46245b75e8
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/cluster-site.conf
@@ -0,0 +1,100 @@
+
+
+ SSLEngine on
+ SSLProxyEngine on
+ SSLCertificateFile "/etc/ssl/certs/server.crt"
+ SSLCertificateKeyFile "/etc/ssl/private/server.key"
+ SSLCACertificateFile "/etc/ssl/certs/ca-bundle.crt"
+
+ SSLVerifyClient require
+ SSLVerifyDepth 10
+
+ ServerName ${PROXY_SERVER_HOST_NAME}
+ UseCanonicalName on
+ ProxyRequests off
+
+ RewriteEngine on
+
+ ################
+ # Spark Web UIs
+ ################
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/gateway/.* [NC]
+ RewriteRule .* http://127.0.0.1:8443%{REQUEST_URI} [P,L]
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/yarn/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/yarn/(.*) http://127.0.0.1:8443/yarn/$1 [P,L]
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jobhistory/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/jobhistory/(.*) http://127.0.0.1:8443/jobhistory/$1 [P,L]
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/apphistory/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/apphistory/(.*) http://127.0.0.1:8443/apphistory/$1 [P,L]
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/sparkhistory/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/sparkhistory/(.*) http://127.0.0.1:8443/sparkhistory/$1 [P,L]
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/hdfs/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/hdfs/(.*) http://127.0.0.1:8443/hdfs/$1 [P,L]
+
+ ################
+ # RStudio
+ ################
+ RewriteCond %{HTTP:Upgrade} =websocket
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) ws://127.0.0.1:8001/$1 [P,L]
+
+ RewriteCond %{HTTP:Upgrade} !=websocket
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) http://127.0.0.1:8001/$1 [P,L]
+
+ # Include a ProxyPassReverse so redirects by RStudio go to the correct server name (e.g. https://notebooks.firecloud.org)
+ # Need to include both http and https, as RStudio redirects to https in some cases.
+ ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ http://127.0.0.1:8001/
+ ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ https://127.0.0.1:8001/
+
+ # Append SameSite=None to cookies set by RStudio. This is required by some browsers because we
+ # render RStudio in an iframe. There does not appear to be a way within RStudio to do this, hence
+ # doing it in the proxy.
+ # [IA-4997] to support CHIPS by setting partitioned cookies
+ # Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly;Partitioned "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
+ Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
+
+ ####################
+ # Welder
+ ####################
+
+ RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/welder/.* [NC]
+ RewriteRule /proxy/[^/]*/[^/]*/welder/(.*) http://127.0.0.1:8080/$1 [P,L]
+
+ #####################################
+ # Jupyter (legacy /notebooks path)
+ #####################################
+
+ RewriteCond %{HTTP:Upgrade} =websocket
+ RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
+ RewriteRule .* ws://127.0.0.1:8000%{REQUEST_URI} [P,L]
+
+ RewriteCond %{HTTP:Upgrade} !=websocket
+ RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
+ RewriteRule .* http://127.0.0.1:8000%{REQUEST_URI} [P,L]
+
+ # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
+
+ ################################
+ # Jupyter (newer /proxy path)
+ ################################
+
+ # This needs to be coordinated with a change in jupyter_notebooks_config.py
+ # which is why we haven't yet enabled this.
+
+ # RewriteCond %{HTTP:Upgrade} =websocket
+ # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
+ # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) ws://127.0.0.1:8000/$1 [P,L]
+
+ # RewriteCond %{HTTP:Upgrade} !=websocket
+ # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
+ # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) http://127.0.0.1:8000/$1 [P,L]
+
+ # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
+
+
diff --git a/http/src/main/resources/base-init-resources/gce-init.sh b/http/src/main/resources/base-init-resources/gce-init.sh
new file mode 100644
index 0000000000..d567bf9e51
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/gce-init.sh
@@ -0,0 +1,647 @@
+#!/usr/bin/env bash
+
+# Borrowed from init-action.sh as our GCE offering came after the dataproc cluster one.
+# This init script instantiates the tool (e.g. Jupyter) docker images on Google Compute Engine instances created by Leo.
+
+set -e -x
+
+# Set variables
+# Values like $(..) are populated by Leo when a cluster is created.
+# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
+# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
+export CLOUD_SERVICE='GCE'
+export CLUSTER_NAME=$(clusterName)
+export RUNTIME_NAME=$(clusterName)
+export GOOGLE_PROJECT=$(googleProject)
+export STAGING_BUCKET=$(stagingBucketName)
+export OWNER_EMAIL=$(loginHint)
+export PET_SA_EMAIL=$(petSaEmail)
+export JUPYTER_SERVER_NAME=$(jupyterServerName)
+export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
+export WELDER_SERVER_NAME=$(welderServerName)
+export WELDER_DOCKER_IMAGE=$(welderDockerImage)
+export RSTUDIO_SERVER_NAME=$(rstudioServerName)
+export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
+export RSTUDIO_USER_HOME=/home/rstudio
+export PROXY_SERVER_NAME=$(proxyServerName)
+export PROXY_DOCKER_IMAGE=$(proxyDockerImage)
+export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
+export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
+export MEM_LIMIT=$(memLimit)
+export SHM_SIZE=$(shmSize)
+export WELDER_MEM_LIMIT=$(welderMemLimit)
+export PROXY_SERVER_HOST_NAME=$(proxyServerHostName)
+export WELDER_ENABLED=$(welderEnabled)
+export NOTEBOOKS_DIR=$(notebooksDir)
+
+START_USER_SCRIPT_URI=$(startUserScriptUri)
+# Include a timestamp suffix to differentiate different startup logs across restarts.
+START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
+IS_GCE_FORMATTED=$(isGceFormatted)
+# Needs to be in sync with terra-docker container
+JUPYTER_HOME=/etc/jupyter
+JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
+USER_HOME=$(jupyterHomeDirectory)
+RSTUDIO_SCRIPTS=/etc/rstudio/scripts
+SERVER_CRT=$(proxyServerCrt)
+SERVER_KEY=$(proxyServerKey)
+ROOT_CA=$(rootCaPem)
+JUPYTER_DOCKER_COMPOSE_GCE=$(jupyterDockerCompose)
+RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose)
+PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
+WELDER_DOCKER_COMPOSE=$(welderDockerCompose)
+GPU_DOCKER_COMPOSE=$(gpuDockerCompose)
+PROXY_SITE_CONF=$(proxySiteConf)
+JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions)
+JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions)
+JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions)
+JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions)
+USER_SCRIPT_URI=$(userScriptUri)
+USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri)
+JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
+CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri)
+GPU_ENABLED=$(gpuEnabled)
+INIT_BUCKET_NAME=$(initBucketName)
+
+CERT_DIRECTORY='/var/certs'
+DOCKER_COMPOSE_FILES_DIRECTORY='/var/docker-compose-files'
+WORK_DIRECTORY='/mnt/disks/work'
+# Toolbox is specific to COS images and is needed to access functionalities like gcloud
+# See https://cloud.google.com/container-optimized-os/docs/how-to/toolbox
+GSUTIL_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gsutil'
+GCLOUD_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gcloud'
+
+# Welder configuration, Rstudio files are saved every X seconds in the background but Jupyter notebooks are not
+if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ export SHOULD_BACKGROUND_SYNC="true"
+else
+ export SHOULD_BACKGROUND_SYNC="false"
+fi
+
+#####################################################################################################
+# Functions
+#####################################################################################################
+
+# Retry a command up to a specific number of times until it exits successfully,
+# with exponential back off. For example:
+#
+# $ retry 5 echo "Hello"
+# Hello
+#
+# $ retry 5 false
+# Retry 1/5 exited 1, retrying in 2 seconds...
+# Retry 2/5 exited 1, retrying in 4 seconds...
+# Retry 3/5 exited 1, retrying in 8 seconds...
+# Retry 4/5 exited 1, retrying in 16 seconds...
+# Retry 5/5 exited 1, no more retries left.
+function retry {
+ local retries=$1
+ shift
+
+ for ((i = 1; i <= $retries; i++)); do
+ # run with an 'or' so set -e doesn't abort the bash script on errors
+ exit=0
+ "$@" || exit=$?
+ if [ $exit -eq 0 ]; then
+ return 0
+ fi
+ wait=$((2 ** $i))
+ if [ $i -eq $retries ]; then
+ log "Retry $i/$retries exited $exit, no more retries left."
+ break
+ fi
+ log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
+ sleep $wait
+ done
+ return 1
+}
+
+function log() {
+ echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
+}
+
+display_time() {
+ local T=$1
+ local D=$((T/60/60/24))
+ local H=$((T/60/60%24))
+ local M=$((T/60%60))
+ local S=$((T%60))
+ (( $D > 0 )) && printf '%d days ' $D
+ (( $H > 0 )) && printf '%d hours ' $H
+ (( $M > 0 )) && printf '%d minutes ' $M
+ (( $D > 0 || $H > 0 || $M > 0 )) && printf 'and '
+ printf '%d seconds\n' $S
+}
+
+function apply_user_script() {
+ # User script to be executed once at creation time, but will not persist when the runtime is paused / resumed
+ local CONTAINER_NAME=$1
+ local TARGET_DIR=$2
+
+ log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..."
+ USER_SCRIPT=`basename ${USER_SCRIPT_URI}`
+ if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then
+ $GSUTIL_CMD cp ${USER_SCRIPT_URI} /var &> /var/user_script_copy_output.txt
+ else
+ curl "${USER_SCRIPT_URI}" -o /var/"${USER_SCRIPT}"
+ fi
+ docker cp /var/"${USER_SCRIPT}" ${CONTAINER_NAME}:${TARGET_DIR}/"${USER_SCRIPT}"
+ # Note that we are running as root
+ retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/"${USER_SCRIPT}"
+
+ # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing
+ # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker
+ # can gain full access to the VM already, so using this flag is not a significant escalation.
+ EXIT_CODE=0
+ docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/"${USER_SCRIPT}" &> /var/us_output.txt || EXIT_CODE=$?
+
+ # Should dump error in staging bucket so we can display that back as part of the error message
+ if [ $EXIT_CODE -ne 0 ]; then
+ log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI."
+ retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"false" cp /var/us_output.txt ${USER_SCRIPT_OUTPUT_URI}
+ exit $EXIT_CODE
+ else
+ retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"true" cp /var/us_output.txt ${USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+function apply_start_user_script() {
+ # User script to be executed at each startup time so the changes will persist between pause/resume cycles.
+ # Only used by the AOU Workbench, not Terra yet.
+ # See https://broadworkbench.atlassian.net/browse/IA-5054
+ local CONTAINER_NAME=$1
+ local TARGET_DIR=$2
+
+ log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..."
+ START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
+ if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then
+ $GSUTIL_CMD cp ${START_USER_SCRIPT_URI} /var
+ else
+ curl $START_USER_SCRIPT_URI -o /var/${START_USER_SCRIPT}
+ fi
+ docker cp /var/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT}
+ retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT}
+
+ # Keep in sync with startup.sh
+ EXIT_CODE=0
+ docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
+ if [ $EXIT_CODE -ne 0 ]; then
+ echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
+ retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"false" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ exit $EXIT_CODE
+ else
+ retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+#####################################################################################################
+# Main starts here.
+#####################################################################################################
+
+log "Running GCE VM init script..."
+
+# Array for instrumentation
+# UPDATE THIS IF YOU ADD MORE STEPS:
+# currently the steps are:
+# START init,
+# .. after persistent disk setup
+# .. after copying files from the GCS init bucket
+# .. after starting google-fluentd
+# .. after docker compose
+# .. after welder start
+# .. after extension install
+# .. after user script
+# .. after start user script
+# .. after start Jupyter
+# END
+
+## Used for profiling
+START_TIME=$(date +%s)
+STEP_TIMINGS=($(date +%s))
+
+
+DOCKER_COMPOSE="docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2"
+
+mkdir -p ${WORK_DIRECTORY}
+mkdir -p ${CERT_DIRECTORY}
+mkdir -p ${DOCKER_COMPOSE_FILES_DIRECTORY}
+
+log 'Formatting and mounting persistent disk...'
+
+# Format and mount persistent disk
+## The PD should be the only `sd` disk that is not mounted yet
+AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd"))
+FreesdDisks=()
+for Disk in "${AllsdDisks[@]}"; do
+ Mounts="$(lsblk -no MOUNTPOINT "${Disk}")"
+ if [ -z "$Mounts" ]; then
+ echo "Found our unmounted persistent disk!"
+ FreesdDisks="${Disk}"
+ else
+ echo "Not our persistent disk!"
+ fi
+done
+DISK_DEVICE_ID=${FreesdDisks}
+
+## Only format disk is it hasn't already been formatted
+if [ "$IS_GCE_FORMATTED" == "false" ] ; then
+ # It's likely that the persistent disk was previously mounted on another VM and wasn't properly unmounted
+ # either because the VM was terminated or there is no unmount in the shutdown sequence and occasionally
+ # fs is getting marked as not clean.
+ # Passing -F -F to mkfs.ext4 should force the tool to ignore the state of the partition.
+ # Note that there should be two instances command-line switch (-F -F) to override this check
+
+ mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard ${DISK_DEVICE_ID} -F -F
+fi
+
+mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY}
+
+# done persistent disk setup
+STEP_TIMINGS+=($(date +%s))
+
+# Enable GPU drivers on top of the base Google DeepLearning default image
+if [ "${GPU_ENABLED}" == "true" ] ; then
+ log 'Installing GPU driver...'
+ version="535.154.05"
+ isAvailable=$(cos-extensions list|grep $version)
+ if [[ -z "$isAvailable" ]]; then
+ # Install default version on the COS image
+ cos-extensions install gpu
+ else
+ cos-extensions install gpu -- --version $version
+ fi
+ mount --bind /var/lib/nvidia /var/lib/nvidia
+ mount -o remount,exec /var/lib/nvidia
+
+ $GSUTIL_CMD cp ${GPU_DOCKER_COMPOSE} ${DOCKER_COMPOSE_FILES_DIRECTORY}
+fi
+
+log 'Copying secrets from GCS...'
+
+# Add the certificates from the bucket to the VM. They are used by the docker-compose file
+$GSUTIL_CMD cp ${SERVER_CRT} ${CERT_DIRECTORY}
+$GSUTIL_CMD cp ${SERVER_KEY} ${CERT_DIRECTORY}
+$GSUTIL_CMD cp ${ROOT_CA} ${CERT_DIRECTORY}
+$GSUTIL_CMD cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY}
+
+
+# Install env var config (e.g. AOU / Terra use it to inject workspace name)
+# e.g. {
+ # "WORKSPACE_NAME": "CARJune24",
+ # "WORKSPACE_NAMESPACE": "callisto-dev",
+ # "WORKSPACE_BUCKET": "gs://fc-09516ff0-136e-4874-8484-1be0afa267a6",
+ # "GOOGLE_PROJECT": "terra-dev-e67d9572",
+ # "CUSTOM_IMAGE": "false",
+ # "DRS_RESOLVER_ENDPOINT": "api/v4/drs/resolve",
+ # "TERRA_DEPLOYMENT_ENV": "dev"
+ #}
+if [ ! -z "$CUSTOM_ENV_VARS_CONFIG_URI" ] ; then
+ log 'Copy custom env vars config...'
+ $GSUTIL_CMD cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var
+fi
+
+# done GCS copy
+STEP_TIMINGS+=($(date +%s))
+
+log 'Starting up the Jupyter...'
+
+# Run docker-compose for each specified compose file.
+# Note the `docker-compose pull` is retried to avoid intermittent network errors, but
+# `docker-compose up` is not retried since if that fails, something is probably broken
+# and wouldn't be remedied by retrying
+COMPOSE_FILES=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_DOCKER_COMPOSE}`)
+cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_DOCKER_COMPOSE}`
+if [ ! -z "$WELDER_DOCKER_IMAGE" ] && [ "$WELDER_ENABLED" == "true" ] ; then
+ COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${WELDER_DOCKER_COMPOSE}`)
+ cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${WELDER_DOCKER_COMPOSE}`
+fi
+
+if [ "${GPU_ENABLED}" == "true" ] ; then
+ COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`)
+ if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ # Little bit of hack to switch the jupyter paths to the rstudio ones. Should have separate docker gpu compose of rJupyter and Rstudio instead
+ sed -i 's/jupyter/rstudio/g' ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
+ sed -i 's#${NOTEBOOKS_DIR}#/home/rstudio#g' ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
+ fi
+ cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
+fi
+
+if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
+ COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${JUPYTER_DOCKER_COMPOSE_GCE}`)
+ cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${JUPYTER_DOCKER_COMPOSE_GCE}`
+fi
+
+if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
+ COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${RSTUDIO_DOCKER_COMPOSE}`)
+ cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${RSTUDIO_DOCKER_COMPOSE}`
+fi
+
+tee /var/variables.env << END
+CERT_DIRECTORY=${CERT_DIRECTORY}
+WORK_DIRECTORY=${WORK_DIRECTORY}
+PROXY_SERVER_NAME=${PROXY_SERVER_NAME}
+PROXY_DOCKER_IMAGE=${PROXY_DOCKER_IMAGE}
+GOOGLE_PROJECT=${GOOGLE_PROJECT}
+RUNTIME_NAME=${RUNTIME_NAME}
+PROXY_SERVER_HOST_NAME=${PROXY_SERVER_HOST_NAME}
+JUPYTER_SERVER_NAME=${JUPYTER_SERVER_NAME}
+JUPYTER_DOCKER_IMAGE=${JUPYTER_DOCKER_IMAGE}
+NOTEBOOKS_DIR=${NOTEBOOKS_DIR}
+OWNER_EMAIL=${OWNER_EMAIL}
+PET_SA_EMAIL=${PET_SA_EMAIL}
+WELDER_ENABLED=${WELDER_ENABLED}
+MEM_LIMIT=${MEM_LIMIT}
+SHM_SIZE=${SHM_SIZE}
+WELDER_SERVER_NAME=${WELDER_SERVER_NAME}
+WELDER_DOCKER_IMAGE=${WELDER_DOCKER_IMAGE}
+STAGING_BUCKET=${STAGING_BUCKET}
+WELDER_MEM_LIMIT=${WELDER_MEM_LIMIT}
+JUPYTER_SCRIPTS=${JUPYTER_SCRIPTS}
+HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}`
+DOCKER_COMPOSE_FILES_DIRECTORY=${DOCKER_COMPOSE_FILES_DIRECTORY}
+RSTUDIO_SERVER_NAME=${RSTUDIO_SERVER_NAME}
+RSTUDIO_DOCKER_IMAGE=${RSTUDIO_DOCKER_IMAGE}
+SHOULD_BACKGROUND_SYNC=${SHOULD_BACKGROUND_SYNC}
+RSTUDIO_USER_HOME=${RSTUDIO_USER_HOME}
+END
+
+# Create a network that allows containers to talk to each other via exposed ports
+docker network create -d bridge app_network
+
+# Dumps the rendered yaml to the init script log.
+${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" config
+
+# Docker Pull
+log 'Pulling docker images...'
+if ! retry 5 ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" pull &> /var/docker_pull_output.txt; then
+ # if coming from a private repo on GCR, need to use credentials supplied in cryptopants/docker-compose-gcr
+ # (see https://hub.docker.com/r/cryptopants/docker-compose-gcr)
+ log 'Docker pull failed. Private image, trying with cryptopants/docker-compose-gcr...'
+ DOCKER_COMPOSE="docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var -w=/var cryptopants/docker-compose-gcr"
+ retry 5 ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" pull &> /var/docker_pull_output.txt;
+fi
+
+# This needs to happen before we start up containers because the jupyter user needs to be the owner of the PD
+chmod a+rwx ${WORK_DIRECTORY}
+
+# Docker compose up, starting all of the containers
+${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d
+
+# Start up crypto detector, if enabled.
+# This should be started after other containers.
+# Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
+# See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
+if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
+ docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
+ --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
+fi
+
+# done welder start
+STEP_TIMINGS+=($(date +%s))
+
+# Jupyter-specific setup, only do if Jupyter is installed
+if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ # user package installation directory
+ mkdir -p ${WORK_DIRECTORY}/packages
+ chmod a+rwx ${WORK_DIRECTORY}/packages
+
+ # Install everything after having mounted the empty PD
+ # This should not be needed anymore if the jupyter home is a directory of the PD mount point
+ # See: https://github.com/DataBiosphere/leonardo/pull/4465/files
+ if [ ! "$USER_HOME" = "/home/jupyter" ] ; then
+ # TODO: Remove once we stop supporting non AI notebooks based images
+ log 'Installing Jupyter kernelspecs'
+ # Install kernelspecs inside the Jupyter container
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
+ fi
+
+ # Install notebook.json which is used to populate Jupyter.notebook.config in JavaScript extensions.
+ # This is used in the edit-mode.js extension that Terra/AoU use.
+ if [ ! -z "$JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI" ] ; then
+ log 'Copy Jupyter frontend notebook config...'
+ $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
+ JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
+ docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
+ fi
+
+ # Install NbExtensions. These are user-specified Jupyter extensions.
+ # For instance Terra UI is passing
+ # {
+ # "nbExtensions": {
+ # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js"
+ # },
+ # "labExtensions": {},
+ # "serverExtensions": {},
+ # "combinedExtensions": {}
+# }
+ if [ ! -z "$JUPYTER_NB_EXTENSIONS" ] ; then
+ for ext in ${JUPYTER_NB_EXTENSIONS}
+ do
+ log "Installing Jupyter NB extension [$ext]..."
+ if [[ $ext == 'gs://'* ]]; then
+ $GSUTIL_CMD cp $ext /var
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
+ JUPYTER_EXTENSION_FILE=`basename $ext`
+ curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
+ docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
+ fi
+ done
+ fi
+
+ # Install serverExtensions if provided by the user
+ if [ ! -z "$JUPYTER_SERVER_EXTENSIONS" ] ; then
+ for ext in ${JUPYTER_SERVER_EXTENSIONS}
+ do
+ log "Installing Jupyter server extension [$ext]..."
+ if [[ $ext == 'gs://'* ]]; then
+ $GSUTIL_CMD cp $ext /var
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
+ fi
+ done
+ fi
+
+ # Install combined extensions if provided by the user
+ if [ ! -z "$JUPYTER_COMBINED_EXTENSIONS" ] ; then
+ for ext in ${JUPYTER_COMBINED_EXTENSIONS}
+ do
+ log "Installing Jupyter combined extension [$ext]..."
+ log $ext
+ if [[ $ext == 'gs://'* ]]; then
+ $GSUTIL_CMD cp $ext /var
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
+ fi
+ done
+ fi
+
+ # Install lab extensions if provided by the user
+ # Note: lab extensions need to installed as jupyter user, not root
+ if [ ! -z "$JUPYTER_LAB_EXTENSIONS" ] ; then
+ for ext in ${JUPYTER_LAB_EXTENSIONS}
+ do
+ log "Installing JupyterLab extension [$ext]..."
+ pwd
+ if [[ $ext == 'gs://'* ]]; then
+ $GSUTIL_CMD cp -r $ext /var
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
+ JUPYTER_EXTENSION_FILE=`basename $ext`
+ curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
+ docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ else
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
+ fi
+ done
+ fi
+
+ # done extension setup
+ STEP_TIMINGS+=($(date +%s))
+
+ # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
+ # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
+ # kernel tries to connect to it.
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
+
+ # For older jupyter images, jupyter_delocalize.py is using 127.0.0.1 as welder's url, which won't work now that we're no longer using `network_mode: host` for GCE VMs
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' $JUPYTER_EXTENSIONS/jupyter_delocalize.py"
+
+ log 'Wget the gitignore_global file, set gitignore in Git Config'
+
+ # Copy gitignore into jupyter container (ask AOU?)
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global \
+ && git config --global core.excludesfile $USER_HOME/gitignore_global"
+
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "whoami"
+
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "ls -l $JUPYTER_EXTENSIONS"
+
+ # Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
+ && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
+ && mkdir -p $JUPYTER_HOME/nbconfig"
+
+ # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
+ # This is to make it so that older images will still work after we change notebooks location to home dir
+ docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
+
+ # If a user script was specified, copy it into the docker container and execute it.
+ if [ ! -z "$USER_SCRIPT_URI" ] ; then
+ log 'Starting user script...'
+ apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
+ fi
+
+ # done user script
+ STEP_TIMINGS+=($(date +%s))
+
+ # If a start user script was specified, copy it into the docker container for consumption during startups.
+ if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
+ apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
+ fi
+
+ # done start user script
+ STEP_TIMINGS+=($(date +%s))
+
+ # Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_localize_extension.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
+
+ log 'Starting Jupyter Notebook...'
+ retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+
+ # done start Jupyter
+ STEP_TIMINGS+=($(date +%s))
+fi
+
+# RStudio specific setup; only do if RStudio is installed
+if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ EXIT_CODE=0
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$?
+ if [ $EXIT_CODE -ne 0 ]; then
+ echo "RStudio user package installation directory creation failed, creating /packages directory"
+ docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages"
+ fi
+
+ # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT
+CLUSTER_NAME=$CLUSTER_NAME
+RUNTIME_NAME=$RUNTIME_NAME
+OWNER_EMAIL=$OWNER_EMAIL
+SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC
+RSTUDIO_USER_HOME=$RSTUDIO_USER_HOME" >> /usr/local/lib/R/etc/Renviron.site'
+
+ # Add custom_env_vars.env to Renviron.site
+ CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env
+ if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then
+ retry 3 docker cp /var/custom_env_vars.env ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/etc/custom_env_vars.env
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/etc/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site'
+ fi
+
+ # For older rstudio images, /etc/rstudio/rserver.conf is using 127.0.0.1 as www-address, which won't work now that we're no longer using `network_mode: host` for GCE VMs
+ docker exec ${RSTUDIO_SERVER_NAME} sed -i "s/127.0.0.1/0.0.0.0/g" /etc/rstudio/rserver.conf
+
+ # If a user script was specified, copy it into the docker container and execute it.
+ if [ ! -z "$USER_SCRIPT_URI" ] ; then
+ apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
+ fi
+
+ # If a start user script was specified, copy it into the docker container for consumption during startups.
+ if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
+ apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
+ fi
+
+ # default autosave to 10 seconds
+ docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'mkdir -p $RSTUDIO_USER_HOME/.config/rstudio \
+ && echo "{
+\"initial_working_directory\": \"~\",
+\"auto_save_on_blur\": true,
+\"auto_save_on_idle\": \"commit\",
+\"posix_terminal_shell\": \"bash\",
+\"auto_save_idle_ms\": 10000
+}" > $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs-temp.json \
+ && mv $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs-temp.json $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs.json \
+ && chown -R rstudio:users $RSTUDIO_USER_HOME/.config'
+
+ # Start RStudio server
+ retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init
+fi
+
+# Resize persistent disk if needed.
+echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..."
+resize2fs ${DISK_DEVICE_ID}
+
+
+# Remove any unneeded cached images to save disk space.
+# Do this asynchronously so it doesn't hold up cluster creation
+log 'Pruning docker images...'
+docker image prune -a -f &
+
+log 'All done!'
+
+ELAPSED_TIME=$(($END_TIME - $START_TIME))
+log "gce-init.sh took $(display_time $ELAPSED_TIME)"
+log "Step timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml b/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
new file mode 100644
index 0000000000..7412d51743
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
@@ -0,0 +1,15 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ jupyter:
+ volumes:
+ # shared with welder
+ - "/mnt/disks/work:${NOTEBOOKS_DIR}"
+ - "/var/lib/nvidia/lib64:/usr/local/nvidia/lib64"
+ - "/var/lib/nvidia/bin:/usr/local/nvidia/bin"
+ devices:
+ - "/dev/nvidia-uvm:/dev/nvidia-uvm"
+ - "/dev/nvidiactl:/dev/nvidiactl"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/init-actions.sh b/http/src/main/resources/base-init-resources/init-actions.sh
new file mode 100644
index 0000000000..a411e8b49e
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/init-actions.sh
@@ -0,0 +1,538 @@
+#!/usr/bin/env bash
+
+set -e -x
+
+# This is the very first script as we started on Dataproc
+#
+# This init script instantiates the tool (e.g. Jupyter) docker images on the Dataproc cluster master node.
+# Adapted from https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/datalab/datalab.sh
+#
+
+#
+# Functions
+#
+
+# Retry a command up to a specific number of times until it exits successfully,
+# with exponential back off.
+#
+# $ retry 5 echo "Hello"
+# Hello
+#
+# $ retry 5 false
+# Retry 1/5 exited 1, retrying in 2 seconds...
+# Retry 2/5 exited 1, retrying in 4 seconds...
+# Retry 3/5 exited 1, retrying in 8 seconds...
+# Retry 4/5 exited 1, retrying in 16 seconds...
+# Retry 5/5 exited 1, no more retries left.
+function retry {
+ local retries=$1
+ shift
+
+ for ((i = 1; i <= $retries; i++)); do
+ # run with an 'or' so set -e doesn't abort the bash script on errors
+ exit=0
+ "$@" || exit=$?
+ if [ $exit -eq 0 ]; then
+ return 0
+ fi
+ wait=$((2 ** $i))
+ if [ $i -eq $retries ]; then
+ log "Retry $i/$retries exited $exit, no more retries left."
+ break
+ fi
+ log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
+ sleep $wait
+ done
+ return 1
+}
+
+function log() {
+ echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
+}
+
+function betterAptGet() {
+ if ! { apt-get update 2>&1 || echo E: update failed; } | grep -q '^[WE]:'; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+function apply_user_script() {
+ local CONTAINER_NAME=$1
+ local TARGET_DIR=$2
+
+ log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..."
+ USER_SCRIPT=`basename ${USER_SCRIPT_URI}`
+ if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then
+ gsutil cp ${USER_SCRIPT_URI} /etc
+ else
+ curl $USER_SCRIPT_URI -o /etc/${USER_SCRIPT}
+ fi
+ docker cp /etc/${USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${USER_SCRIPT}
+ retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${USER_SCRIPT}
+
+ # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing
+ # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker
+ # can gain full access to the VM already, so using this flag is not a significant escalation.
+ EXIT_CODE=0
+ docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${USER_SCRIPT} &> us_output.txt || EXIT_CODE=$?
+
+ if [ $EXIT_CODE -ne 0 ]; then
+ log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI."
+ retry 3 gsutil -h "x-goog-meta-passed":"false" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI}
+ exit $EXIT_CODE
+ else
+ retry 3 gsutil -h "x-goog-meta-passed":"true" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+function apply_start_user_script() {
+ local CONTAINER_NAME=$1
+ local TARGET_DIR=$2
+
+ log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..."
+ START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
+ if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then
+ gsutil cp ${START_USER_SCRIPT_URI} /etc
+ else
+ curl $START_USER_SCRIPT_URI -o /etc/${START_USER_SCRIPT}
+ fi
+ docker cp /etc/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT}
+ retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT}
+
+ # Keep in sync with startup.sh
+ EXIT_CODE=0
+ docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> start_output.txt || EXIT_CODE=$?
+ if [ $EXIT_CODE -ne 0 ]; then
+ echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
+ retry 3 gsutil -h "x-goog-meta-passed":"false" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ exit $EXIT_CODE
+ else
+ retry 3 gsutil -h "x-goog-meta-passed":"true" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+#
+# Main
+#
+
+#
+# Array for instrumentation
+# UPDATE THIS IF YOU ADD MORE STEPS:
+# currently the steps are:
+# START init,
+# .. after gcloud Ops Agent
+# .. after env setup
+# .. after copying files from google and into docker
+# .. after docker compose
+# .. after welder start
+# .. after hail and spark
+# .. after nbextension install
+# .. after server extension install
+# .. after combined extension install
+# .. after user script
+# .. after lab extension install
+# .. after jupyter notebook start
+# END
+STEP_TIMINGS=($(date +%s))
+
+# temp workaround for https://github.com/docker/compose/issues/5930
+export CLOUDSDK_PYTHON=python3
+
+# This identifies whether we are running on the master node (running the jupyter container). There does not seem to be any customization of the worker nodes
+ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role)
+
+# Only initialize tool and proxy docker containers on the master
+if [[ "${ROLE}" == 'Master' ]]; then
+
+ ## Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ###
+ # See https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/opsagent
+ # Installs the Google Cloud Ops Agent on each node in the cluster.
+ # It also provides an override to the built-in logging config to set empty
+ # receivers i.e. not collect any logs.
+ # If you need to collect syslogs, you can use the other script in this directory,
+ # opsagent.sh which uses the built-in configuration of Ops Agent.
+ # See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default.
+ #
+ curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh
+ bash add-google-cloud-ops-agent-repo.sh --also-install
+
+ JUPYTER_HOME=/etc/jupyter
+ JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
+ JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
+ KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
+
+ # Set variables
+ # Values like $(..) are populated by Leo when a cluster is created.
+ # See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
+ # Avoid exporting variables unless they are needed by external scripts or docker-compose files.
+ export CLOUD_SERVICE='DATAPROC'
+ # Needs to be in sync with terra-docker container
+ export USER_HOME=$(jupyterHomeDirectory)
+ export CLUSTER_NAME=$(clusterName)
+ export RUNTIME_NAME=$(clusterName)
+ export GOOGLE_PROJECT=$(googleProject)
+ export STAGING_BUCKET=$(stagingBucketName)
+ export OWNER_EMAIL=$(loginHint)
+ export PET_SA_EMAIL=$(petSaEmail)
+ export JUPYTER_SERVER_NAME=$(jupyterServerName)
+ export RSTUDIO_SERVER_NAME=$(rstudioServerName)
+ export PROXY_SERVER_NAME=$(proxyServerName)
+ export WELDER_SERVER_NAME=$(welderServerName)
+ export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
+ export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
+ export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
+ export PROXY_DOCKER_IMAGE=$(proxyDockerImage)
+ export WELDER_DOCKER_IMAGE=$(welderDockerImage)
+ export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
+ export WELDER_ENABLED=$(welderEnabled)
+ export NOTEBOOKS_DIR=$(notebooksDir)
+ export MEM_LIMIT=$(memLimit)
+ export SHM_SIZE=$(shmSize)
+ export WELDER_MEM_LIMIT=$(welderMemLimit)
+ export PROXY_SERVER_HOST_NAME=$(proxyServerHostName)
+ export CERT_DIRECTORY='/certs'
+ export WORK_DIRECTORY='/work'
+ export DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
+ PROXY_SITE_CONF=$(proxySiteConf)
+ export HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}`
+ if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ export SHOULD_BACKGROUND_SYNC="true"
+ else
+ export SHOULD_BACKGROUND_SYNC="false"
+ fi
+
+ SERVER_CRT=$(proxyServerCrt)
+ SERVER_KEY=$(proxyServerKey)
+ ROOT_CA=$(rootCaPem)
+ JUPYTER_DOCKER_COMPOSE=$(jupyterDockerCompose)
+ RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose)
+ PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
+ WELDER_DOCKER_COMPOSE=$(welderDockerCompose)
+ PROXY_SITE_CONF=$(proxySiteConf)
+ JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions)
+ JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions)
+ JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions)
+ JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions)
+ USER_SCRIPT_URI=$(userScriptUri)
+ USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri)
+ START_USER_SCRIPT_URI=$(startUserScriptUri)
+ # Include a timestamp suffix to differentiate different startup logs across restarts.
+ START_USER_SCRIPT_OUTPUT_URI="$(startUserScriptOutputUri)"
+ JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
+ CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri)
+ RSTUDIO_SCRIPTS=/etc/rstudio/scripts
+ RSTUDIO_USER_HOME=/home/rstudio
+ INIT_BUCKET_NAME=$(initBucketName)
+
+ STEP_TIMINGS+=($(date +%s))
+
+ log 'Copying secrets from GCS...'
+
+ mkdir /work
+ mkdir /certs
+ chmod a+rwx /work
+
+ # Add the certificates from the bucket to the VM. They are used by the docker-compose file
+ gsutil cp ${SERVER_CRT} /certs
+ gsutil cp ${SERVER_KEY} /certs
+ gsutil cp ${ROOT_CA} /certs
+ gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY}
+
+
+ # GCP connector is used by dataproc to connect with the staging bucket to read the logs
+ touch /hadoop_gcs_connector_metadata_cache
+ touch auth_openidc.conf
+
+
+ # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts
+ cat <> /etc/google-cloud-ops-agent/config.yaml
+ logging:
+ receivers:
+ welder:
+ type: files
+ include_paths: [/work/welder.log]
+ jupyter:
+ type: files
+ include_paths: [/work/jupyter.log]
+ daemon:
+ type: files
+ include_paths: [/var/log/daemon.log]
+ service:
+ pipelines:
+ default_pipeline:
+ receivers: [welder, jupyter, daemon]
+EOF
+ systemctl restart google-cloud-ops-agent
+
+ # Install env var config
+ if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then
+ log 'Copy custom env vars config...'
+ gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var
+ fi
+
+
+ # If any image is hosted in a GAR registry (detected by regex) then
+ # authorize docker to interact with gcr.io.
+ # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect.
+ # TODO (LM) test with non-broad account
+ if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
+ log 'Authorizing GCR/GAR...'
+ gcloud auth configure-docker
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ log 'Starting up the Jupyter docker...'
+
+ # Run docker-compose for each specified compose file.
+ # Note the `docker-compose pull` is retried to avoid intermittent network errors, but
+ # `docker-compose up` is not retried.
+ COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`)
+
+ cat /etc/`basename ${PROXY_DOCKER_COMPOSE}`
+
+ if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then
+ COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`)
+ cat /etc/`basename ${WELDER_DOCKER_COMPOSE}`
+ fi
+
+ if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
+ TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
+ COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`)
+ cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`
+ fi
+
+ if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then
+ TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
+ COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`)
+ cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`
+ fi
+
+ retry 5 docker-compose "${COMPOSE_FILES[@]}" config
+
+ # restart docker
+ systemctl restart docker
+
+ retry 5 docker-compose "${COMPOSE_FILES[@]}" pull
+ retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d
+
+ # Start up crypto detector, if enabled.
+ # This should be started after other containers.
+ # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
+ # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
+ if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
+ docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
+ --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # Jupyter-specific setup, only do if Jupyter is installed
+ if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
+ log 'Installing Jupydocker kernelspecs...'
+
+ # Install notebook.json
+ if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then
+ log 'Copy Jupyter frontend notebook config...'
+ gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc
+ JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
+ docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # Install NbExtensions. These are user-specified Jupyter extensions.
+ # For instance Terra UI is passing
+ # {
+ # "nbExtensions": {
+ # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js"
+ # },
+ # "labExtensions": {},
+ # "serverExtensions": {},
+ # "combinedExtensions": {}
+ # }
+ if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then
+ for ext in ${JUPYTER_NB_EXTENSIONS}
+ do
+ log 'Installing Jupyter NB extension [$ext]...'
+ if [[ $ext == 'gs://'* ]]; then
+ gsutil cp $ext /etc
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
+ JUPYTER_EXTENSION_FILE=`basename $ext`
+ curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
+ docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
+ fi
+ done
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # Install serverExtensions if provided by the user
+ if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then
+ for ext in ${JUPYTER_SERVER_EXTENSIONS}
+ do
+ log 'Installing Jupyter server extension [$ext]...'
+ if [[ $ext == 'gs://'* ]]; then
+ gsutil cp $ext /etc
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME}${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
+ fi
+ done
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # Install combined extensions if provided by the user
+ if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then
+ for ext in ${JUPYTER_COMBINED_EXTENSIONS}
+ do
+ log 'Installing Jupyter combined extension [$ext]...'
+ log $ext
+ if [[ $ext == 'gs://'* ]]; then
+ gsutil cp $ext /etc
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ else
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
+ fi
+ done
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # If a user script was specified, copy it into the docker container and execute it.
+ if [ ! -z "$USER_SCRIPT_URI" ] ; then
+ apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
+ fi
+
+ # done user script
+ STEP_TIMINGS+=($(date +%s))
+
+ # If a start user script was specified, copy it into the docker container for consumption during startups.
+ if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
+ apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
+ fi
+
+ # done start user script
+ STEP_TIMINGS+=($(date +%s))
+
+ # Install lab extensions if provided by the user
+ # Note: lab extensions need to installed as jupyter user, not root
+ if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then
+ for ext in ${JUPYTER_LAB_EXTENSIONS}
+ do
+ log 'Installing JupyterLab extension [$ext]...'
+ pwd
+ if [[ $ext == 'gs://'* ]]; then
+ gsutil cp -r $ext /etc
+ JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
+ docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
+ JUPYTER_EXTENSION_FILE=`basename $ext`
+ curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
+ docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ else
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
+ fi
+ done
+ fi
+
+ STEP_TIMINGS+=($(date +%s))
+
+ # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
+ # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
+ # kernel tries to connect to it.
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
+
+ # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
+ # A better to do this might be to take welder host as an argument to the script
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py"
+
+ # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
+ # This is to make it so that older images will still work after we change notebooks location to home dir
+ docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
+
+ # Copy gitignore into jupyter container
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global"
+
+ # Install nbstripout and set gitignore in Git Config
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
+ && python -m nbstripout --install --global \
+ && git config --global core.excludesfile $USER_HOME/gitignore_global"
+
+ # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
+ && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
+ && mkdir -p $JUPYTER_HOME/nbconfig"
+
+ log 'Starting Jupyter Notebook...'
+ retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+
+ STEP_TIMINGS+=($(date +%s))
+ fi
+
+ # RStudio specific setup; only do if RStudio is installed
+ if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ EXIT_CODE=0
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$?
+ if [ $EXIT_CODE -ne 0 ]; then
+ echo "RStudio user package installation directory creation failed, creating /packages directory"
+ docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages"
+ fi
+
+ # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT
+CLUSTER_NAME=$CLUSTER_NAME
+RUNTIME_NAME=$RUNTIME_NAME
+OWNER_EMAIL=$OWNER_EMAIL
+SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site'
+
+ # Add custom_env_vars.env to Renviron.site
+ CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env
+ if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then
+ retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env
+ retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site'
+ fi
+
+ # If a user script was specified, copy it into the docker container and execute it.
+ if [ ! -z "$USER_SCRIPT_URI" ] ; then
+ apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
+ fi
+
+ # If a start user script was specified, copy it into the docker container for consumption during startups.
+ if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
+ apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
+ fi
+
+ # Start RStudio server
+ retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init
+ fi
+
+ # Remove any unneeded cached images to save disk space.
+ # Do this asynchronously so it doesn't hold up cluster creation
+ log 'Pruning docker images...'
+ docker image prune -a -f &
+fi
+
+log 'All done!'
+log "Timings: ${STEP_TIMINGS[@]}"
diff --git a/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
new file mode 100644
index 0000000000..d94f5eb763
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
@@ -0,0 +1,46 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ jupyter:
+ container_name: "${JUPYTER_SERVER_NAME}"
+ image: "${JUPYTER_DOCKER_IMAGE}"
+ # Override entrypoint with a placeholder to keep the container running indefinitely.
+ # The cluster init script will run some scripts as root and then start pyspark as
+ # jupyter-user via docker exec.
+ # -F will follow the log when the log is created.
+ entrypoint: "tail -F ${NOTEBOOKS_DIR}/jupyter.log"
+ ports:
+ - "8000:8000"
+ networks:
+ - app_network
+ volumes:
+ # shared with welder
+ - "/mnt/disks/work:${NOTEBOOKS_DIR}"
+ restart: always
+ environment:
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ PET_SA_EMAIL: "${PET_SA_EMAIL}"
+ # Value must be the string "true" to enable.
+ WELDER_ENABLED: "${WELDER_ENABLED}"
+ NOTEBOOKS_DIR: "${NOTEBOOKS_DIR}"
+ PIP_USER: "true"
+ R_LIBS: "${NOTEBOOKS_DIR}/packages"
+ # The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
+ # We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
+ PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ env_file:
+ - /var/custom_env_vars.env
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
+ memswap_limit: ${MEM_LIMIT}
+ shm_size: ${SHM_SIZE}
+networks:
+ app_network:
+ external: true
diff --git a/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
new file mode 100644
index 0000000000..1a8d7f8e88
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
@@ -0,0 +1,61 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ jupyter:
+ container_name: "${JUPYTER_SERVER_NAME}"
+ image: "${JUPYTER_DOCKER_IMAGE}"
+ # Override entrypoint with a placeholder to keep the container running indefinitely.
+ # The runtime init script will run some scripts as root and then start pyspark as
+ # jupyter-user via docker exec.
+ entrypoint: "tail -f /dev/null"
+ network_mode: host
+ volumes:
+ # shared with welder
+ - ${WORK_DIRECTORY}:${NOTEBOOKS_DIR}
+ - /usr/lib/bigtop-utils:/usr/lib/bigtop-utils
+ - /usr/lib/hadoop:/usr/lib/hadoop
+ - /usr/lib/hadoop-hdfs:/usr/lib/hadoop-hdfs
+ - /usr/lib/hadoop-mapreduce:/usr/lib/hadoop-mapreduce
+ - /usr/lib/hadoop-yarn:/usr/lib/hadoop-yarn
+ - /usr/lib/hive:/usr/lib/hive
+ - /usr/lib/pig:/usr/lib/pig
+ - /etc/hadoop:/etc/hadoop
+ - /usr/lib/spark:/usr/lib/spark
+ - /etc/spark:/etc/spark
+ - /etc/hive:/etc/hive
+ - /usr/bin/pyspark:/usr/bin/pyspark
+ - /usr/bin/hdfs:/usr/bin/hdfs
+ - /usr/bin/hadoop:/usr/bin/hadoop
+ - /usr/bin/spark-submit:/usr/bin/spark-submit
+ - /usr/bin/yarn:/usr/bin/yarn
+ - /usr/bin/pig:/usr/bin/pig
+ - /hadoop:/hadoop
+ - /hadoop_gcs_connector_metadata_cache:/hadoop_gcs_connector_metadata_cache
+ - /usr/local/share/google/dataproc:/usr/local/share/google/dataproc
+ restart: always
+ environment:
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ PET_SA_EMAIL: "${PET_SA_EMAIL}"
+ # Value must be the string "true" to enable.
+ WELDER_ENABLED: "${WELDER_ENABLED}"
+ NOTEBOOKS_DIR: "${NOTEBOOKS_DIR}"
+ MEM_LIMIT: "${MEM_LIMIT}"
+ # (1/6/2022) When it's a year from now, consider removing the next two lines.
+ # The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2.
+ # We should remove the two lines once we no longer support older images.
+ # When we update base image in terra-docker next time, we should verify the paths are still valid
+ PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ env_file:
+ - /var/custom_env_vars.env
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
+ memswap_limit: ${MEM_LIMIT}
+ shm_size: ${SHM_SIZE}
diff --git a/http/src/main/resources/base-init-resources/notebook.json b/http/src/main/resources/base-init-resources/notebook.json
new file mode 100644
index 0000000000..d6e21fc722
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/notebook.json
@@ -0,0 +1,7 @@
+{
+ "googleProject": $(googleProject),
+ "clusterName": $(clusterName),
+ "loginHint": $(loginHint),
+ "googleClientId": $(googleClientId),
+ "welderEnabled": $(welderEnabled)
+}
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
new file mode 100644
index 0000000000..1ab92fe839
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
@@ -0,0 +1,21 @@
+version: '2.4'
+services:
+ proxy:
+ container_name: "${PROXY_SERVER_NAME}"
+ image: "${PROXY_DOCKER_IMAGE}"
+ ports:
+ - "443:443"
+ networks:
+ - app_network
+ volumes:
+ - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro
+ - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro
+ - ${CERT_DIRECTORY}/rootCA.pem:/etc/ssl/certs/ca-bundle.crt:ro
+ - ${HOST_PROXY_SITE_CONF_FILE_PATH}:/etc/apache2/sites-enabled/site.conf
+ restart: always
+ environment:
+ HTTPD_PORT: '80'
+ SSL_HTTPD_PORT: '443'
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ PROXY_SERVER_HOST_NAME: "${PROXY_SERVER_HOST_NAME}"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml b/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
new file mode 100644
index 0000000000..932c6bfbfc
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
@@ -0,0 +1,19 @@
+version: '2.4'
+services:
+ proxy:
+ container_name: "${PROXY_SERVER_NAME}"
+ image: "mirror.gcr.io/${PROXY_DOCKER_IMAGE}"
+ network_mode: host
+ ipc: shareable
+ volumes:
+ - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro
+ - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro
+ - ${CERT_DIRECTORY}/rootCA.pem:/etc/ssl/certs/ca-bundle.crt:ro
+ - ${HOST_PROXY_SITE_CONF_FILE_PATH}:/etc/apache2/sites-enabled/site.conf
+ restart: always
+ environment:
+ HTTPD_PORT: '80'
+ SSL_HTTPD_PORT: '443'
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ PROXY_SERVER_HOST_NAME: "${PROXY_SERVER_HOST_NAME}"
diff --git a/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
new file mode 100644
index 0000000000..b1958cace3
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
@@ -0,0 +1,35 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ rstudio:
+ container_name: "${RSTUDIO_SERVER_NAME}"
+ image: "${RSTUDIO_DOCKER_IMAGE}"
+ # Override the entrypoint from the Dockerfile so rserver starts with the below environment variables
+ entrypoint: "tail -f /dev/null"
+ restart: always
+ ports:
+ - "8001:8001"
+ networks:
+ - app_network
+ environment:
+ # needed to disable auth
+ USER: "rstudio"
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ RSTUDIO_USER_HOME: "${RSTUDIO_USER_HOME}"
+ volumes:
+ - ${WORK_DIRECTORY}:/home/rstudio
+ env_file:
+ - /var/custom_env_vars.env
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${MEM_LIMIT} # hard limit in byte on memory consumption by the container
+ memswap_limit: ${MEM_LIMIT}
+ shm_size: ${SHM_SIZE}
+networks:
+ app_network:
+ external: true
diff --git a/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml b/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
new file mode 100644
index 0000000000..9733f1247c
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
@@ -0,0 +1,28 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ rstudio:
+ container_name: "${RSTUDIO_SERVER_NAME}"
+ image: "${RSTUDIO_DOCKER_IMAGE}"
+ # Override the entrypoint from the Dockerfile so rserver starts with the below environment variables
+ entrypoint: "tail -f /dev/null"
+ restart: always
+ network_mode: host
+ environment:
+ # needed to disable auth
+ USER: "rstudio"
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ volumes:
+ - ${WORK_DIRECTORY}:/home/rstudio
+ env_file:
+ - /var/custom_env_vars.env
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
+ memswap_limit: ${MEM_LIMIT}
+ shm_size: ${SHM_SIZE}
diff --git a/http/src/main/resources/base-init-resources/shutdown.sh b/http/src/main/resources/base-init-resources/shutdown.sh
new file mode 100644
index 0000000000..6be30339a6
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/shutdown.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -e -x
+
+##
+# This is a shutdown script designed to run on Leo-created Google Dataproc clusters and Google Compute Engines (GCE).
+##
+
+# The CLOUD_SERVICE is assumed based on the location of the certs directory
+if [ -f "/var/certs/jupyter-server.crt" ]
+then
+ export CLOUD_SERVICE='GCE'
+else
+ export CLOUD_SERVICE='DATAPROC'
+fi
+
+# Set variables
+# Values like $(..) are populated by Leo when a cluster is resumed.
+# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
+# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
+export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
+export RSTUDIO_SERVER_NAME=$(rstudioServerName)
+export SHOULD_DELETE_JUPYTER_DIR=$(shouldDeleteJupyterDir)
+
+
+# Remove jupyter related files if user decides to delete the VM
+if [ -d '/mnt/disks/work/.jupyter' ] && [ "SHOULD_DELETE_JUPYTER_DIR" = "true" ] ; then
+ rm -rf /mnt/disks/work/.jupyter
+ rm -rf /mnt/disks/work/.local || true
+fi
+
+if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
+ # COS images need to run docker-compose as a container by design
+ DOCKER_COMPOSE='docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2'
+else
+ # Dataproc has docker-compose natively installed
+ DOCKER_COMPOSE='docker-compose'
+fi
+
+$DOCKER_COMPOSE down
diff --git a/http/src/main/resources/base-init-resources/startup.sh b/http/src/main/resources/base-init-resources/startup.sh
new file mode 100644
index 0000000000..8ba25adcad
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/startup.sh
@@ -0,0 +1,399 @@
+#!/usr/bin/env bash
+
+set -e -x
+
+##
+# This is a startup script designed to run on Leo-created Dataproc clusters and GCE VMs.
+#
+# It starts up Jupyter and Welder processes. It also optionally deploys welder on a
+# cluster if not already installed.
+##
+
+EXIT_CODE=0
+
+# Set variables
+# Values like $(..) are populated by Leo when a cluster is resumed.
+# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
+# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
+# The CLOUD_SERVICE is assumed based on the location of the certs directory
+if [ -f "/var/certs/jupyter-server.crt" ]
+then
+ export CLOUD_SERVICE='GCE'
+ export WORK_DIRECTORY='/mnt/disks/work'
+ CERT_DIRECTORY='/var/certs'
+ GSUTIL_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gsutil'
+ GCLOUD_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gcloud'
+ DOCKER_COMPOSE='docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2'
+ DOCKER_COMPOSE_FILES_DIRECTORY='/var/docker-compose-files'
+
+else
+ export CLOUD_SERVICE='DATAPROC'
+ export WORK_DIRECTORY='/work'
+ CERT_DIRECTORY='/certs'
+ GSUTIL_CMD='gsutil'
+ GCLOUD_CMD='gcloud'
+ DOCKER_COMPOSE='docker-compose'
+ DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
+fi
+export USER_HOME=$(jupyterHomeDirectory)
+export RSTUDIO_USER_HOME=/home/rstudio
+export GOOGLE_PROJECT=$(googleProject)
+export CLUSTER_NAME=$(clusterName)
+export RUNTIME_NAME=$(clusterName)
+export OWNER_EMAIL=$(loginHint)
+export PET_SA_EMAIL=$(petSaEmail)
+export JUPYTER_SERVER_NAME=$(jupyterServerName)
+export RSTUDIO_SERVER_NAME=$(rstudioServerName)
+export WELDER_SERVER_NAME=$(welderServerName)
+export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
+export NOTEBOOKS_DIR=$(notebooksDir)
+export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
+export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
+JUPYTER_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/jupyter-docker*)
+COMPLETE_JUPYTER_DOCKER_COMPOSE="-f $JUPYTER_DOCKER_COMPOSE"
+RSTUDIO_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/rstudio-docker*)
+COMPLETE_RSTUDIO_DOCKER_COMPOSE="-f $RSTUDIO_DOCKER_COMPOSE"
+export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
+export WELDER_ENABLED=$(welderEnabled)
+export UPDATE_WELDER=$(updateWelder)
+export WELDER_DOCKER_IMAGE=$(welderDockerImage)
+export DISABLE_DELOCALIZATION=$(disableDelocalization)
+export STAGING_BUCKET=$(stagingBucketName)
+export START_USER_SCRIPT_URI=$(startUserScriptUri)
+export START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
+export WELDER_MEM_LIMIT=$(welderMemLimit)
+export MEM_LIMIT=$(memLimit)
+export SHM_SIZE=$(shmSize)
+export INIT_BUCKET_NAME=$(initBucketName)
+export USE_GCE_STARTUP_SCRIPT=$(useGceStartupScript)
+export PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
+JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
+GPU_ENABLED=$(gpuEnabled)
+if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ export SHOULD_BACKGROUND_SYNC="true"
+else
+ export SHOULD_BACKGROUND_SYNC="false"
+fi
+
+# Overwrite old cert on restart
+SERVER_CRT=$(proxyServerCrt)
+SERVER_KEY=$(proxyServerKey)
+ROOT_CA=$(rootCaPem)
+
+#
+# Functions
+# (copied from init-actions.sh and gce-init.sh, see documentation there)
+#
+function retry {
+ local retries=$1
+ shift
+
+ for ((i = 1; i <= $retries; i++)); do
+ # run with an 'or' so set -e doesn't abort the bash script on errors
+ exit=0
+ "$@" || exit=$?
+ if [ $exit -eq 0 ]; then
+ return 0
+ fi
+ wait=$((2 ** $i))
+ if [ $i -eq $retries ]; then
+ log "Retry $i/$retries exited $exit, no more retries left."
+ break
+ fi
+ log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
+ sleep $wait
+ done
+ return 1
+}
+
+function log() {
+ echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
+}
+
+function failScriptIfError() {
+ if [ $EXIT_CODE -ne 0 ]; then
+ echo "Fail to docker-compose start container ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
+ retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"false" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ exit $EXIT_CODE
+ else
+ retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+function validateCert() {
+ certFileDirectory=$1
+ ## This helps when we need to rotate certs.
+ notAfter=`openssl x509 -enddate -noout -in ${certFileDirectory}/jupyter-server.crt` # output should be something like `notAfter=Jul 4 20:31:52 2026 GMT`
+
+ ## If cert is old, then pull latest certs. Update date if we need to rotate cert again
+ if [[ "$notAfter" != *"notAfter=Jul 4"* ]] ; then
+ ${GSUTIL_CMD} cp ${SERVER_CRT} ${certFileDirectory}
+ ${GSUTIL_CMD} cp ${SERVER_KEY} ${certFileDirectory}
+ ${GSUTIL_CMD} cp ${ROOT_CA} ${certFileDirectory}
+
+ IMAGES_TO_RESTART=(-f /var/docker-compose-files/proxy-docker-compose-gce.yaml)
+ DATAPROC_IMAGES_TO_RESTART=(-f /etc/proxy-docker-compose.yaml)
+ if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ]; then
+ IMAGES_TO_RESTART+=(-f /var/docker-compose-files/welder-docker-compose-gce.yaml)
+ DATAPROC_IMAGES_TO_RESTART+=(-f /etc/welder-docker-compose.yaml)
+ fi
+ if [[ ! -z "$RSTUDIO_DOCKER_IMAGE" ]] ; then
+ IMAGES_TO_RESTART+=(-f /var/docker-compose-files/rstudio-docker-compose-gce.yaml)
+ fi
+ if [[ ! -z "$JUPYTER_DOCKER_IMAGE" ]] ; then
+ IMAGES_TO_RESTART+=(-f /var/docker-compose-files/jupyter-docker-compose-gce.yaml)
+ DATAPROC_IMAGES_TO_RESTART+=(-f /etc/jupyter-docker-compose.yaml )
+ fi
+
+ if [ "${CLOUD_SERVICE}" == 'DATAPROC' ]
+ then
+ ${DOCKER_COMPOSE} "${DATAPROC_IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
+ else
+ ${DOCKER_COMPOSE} --env-file=/var/variables.env "${IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
+ fi
+
+ failScriptIfError ${GSUTIL_CMD}
+ retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
+ fi
+}
+
+#
+# Main
+## The PD should be the only `sd` disk that is not mounted yet
+AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd"))
+FreesdDisks=()
+for Disk in "${AllsdDisks[@]}"; do
+ Mounts="$(lsblk -no MOUNTPOINT "${Disk}")"
+ if [ -z "$Mounts" ]; then
+ echo "Found our unmounted persistent disk!"
+ FreesdDisks="${Disk}"
+ else
+ echo "Not our persistent disk!"
+ fi
+done
+DISK_DEVICE_ID=${FreesdDisks}
+
+## Notebook server home directories
+JUPYTER_HOME=/etc/jupyter
+RSTUDIO_SCRIPTS=/etc/rstudio/scripts
+
+if [ "${GPU_ENABLED}" == "true" ] ; then
+ log 'Installing GPU driver...'
+ version="535.154.05"
+ isAvailable=$(cos-extensions list|grep $version)
+ if [[ -z "$isAvailable" ]]; then
+ # Install default version on the COS image
+ cos-extensions install gpu
+ else
+ cos-extensions install gpu -- --version $version
+ fi
+
+ mount --bind /var/lib/nvidia /var/lib/nvidia
+ mount -o remount,exec /var/lib/nvidia
+
+ GPU_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/gpu-docker*)
+ COMPLETE_JUPYTER_DOCKER_COMPOSE="-f $JUPYTER_DOCKER_COMPOSE -f $GPU_DOCKER_COMPOSE"
+ COMPLETE_RSTUDIO_DOCKER_COMPOSE="-f $RSTUDIO_DOCKER_COMPOSE -f $GPU_DOCKER_COMPOSE"
+fi
+
+
+if [ "$UPDATE_WELDER" == "true" ] ; then
+ echo "Upgrading welder..."
+ WELDER_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/welder*)
+ # Make sure when runtimes restarts, they'll get a new version of welder docker compose file
+ $GSUTIL_CMD cp gs://${INIT_BUCKET_NAME}/`basename ${WELDER_DOCKER_COMPOSE}` $WELDER_DOCKER_COMPOSE
+
+tee /var/welder-variables.env << END
+WORK_DIRECTORY=${WORK_DIRECTORY}
+GOOGLE_PROJECT=${GOOGLE_PROJECT}
+RUNTIME_NAME=${RUNTIME_NAME}
+OWNER_EMAIL=${OWNER_EMAIL}
+PET_SA_EMAIL=${PET_SA_EMAIL}
+WELDER_ENABLED=${WELDER_ENABLED}
+WELDER_SERVER_NAME=${WELDER_SERVER_NAME}
+WELDER_DOCKER_IMAGE=${WELDER_DOCKER_IMAGE}
+STAGING_BUCKET=${STAGING_BUCKET}
+WELDER_MEM_LIMIT=${WELDER_MEM_LIMIT}
+SHOULD_BACKGROUND_SYNC=${SHOULD_BACKGROUND_SYNC}
+END
+
+ ${DOCKER_COMPOSE} -f ${WELDER_DOCKER_COMPOSE} stop
+ ${DOCKER_COMPOSE} -f ${WELDER_DOCKER_COMPOSE} rm -f
+ ${DOCKER_COMPOSE} --env-file=/var/welder-variables.env -f ${WELDER_DOCKER_COMPOSE} up -d &> /var/start_output.txt || EXIT_CODE=$?
+fi
+
+if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
+ # GCE
+ fsck.ext4 -tvy ${DISK_DEVICE_ID}
+ mkdir -p /mnt/disks/work
+ mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY}
+ chmod a+rwx /mnt/disks/work
+
+ # (1/6/22) Restart Jupyter Container to reset `NOTEBOOKS_DIR` for existing runtimes. This code can probably be removed after a year
+ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
+ # The user might have updated the runtime, which would change some environment variables like MEM_LIMIT and SHM_SIZE
+
+tee /var/variables.env << END
+JUPYTER_SERVER_NAME=${JUPYTER_SERVER_NAME}
+JUPYTER_DOCKER_IMAGE=${JUPYTER_DOCKER_IMAGE}
+NOTEBOOKS_DIR=${NOTEBOOKS_DIR}
+GOOGLE_PROJECT=${GOOGLE_PROJECT}
+RUNTIME_NAME=${RUNTIME_NAME}
+OWNER_EMAIL=${OWNER_EMAIL}
+PET_SA_EMAIL=${PET_SA_EMAIL}
+WELDER_ENABLED=${WELDER_ENABLED}
+SHM_SIZE=${SHM_SIZE}
+END
+
+ # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
+ # We only want to restart the existing container with the latest environment variables
+ ${DOCKER_COMPOSE} --env-file=/var/variables.env ${COMPLETE_JUPYTER_DOCKER_COMPOSE} up -d --no-recreate
+
+ # the docker containers need to be restarted or the jupyter container
+ # will fail to start until the appropriate volume/device exists
+ docker restart $JUPYTER_SERVER_NAME
+ docker restart $WELDER_SERVER_NAME
+
+ # update memory size, the memory swap must be updated as well (cannot be < memory)
+ docker update --memory ${MEM_LIMIT} --memory-swap ${MEM_LIMIT} $JUPYTER_SERVER_NAME
+
+ log 'Copy Jupyter frontend notebook config...'
+ $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
+ JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
+ docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
+ fi
+
+ if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ echo "Restarting Rstudio Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
+ # The user might have updated the runtime, which would change some environment variables like MEM_LIMIT and SHM_SIZE
+
+tee /var/variables.env << END
+WORK_DIRECTORY=${WORK_DIRECTORY}
+RSTUDIO_SERVER_NAME=${RSTUDIO_SERVER_NAME}
+RSTUDIO_DOCKER_IMAGE=${RSTUDIO_DOCKER_IMAGE}
+RSTUDIO_USER_HOME=${RSTUDIO_USER_HOME}
+GOOGLE_PROJECT=${GOOGLE_PROJECT}
+RUNTIME_NAME=${RUNTIME_NAME}
+OWNER_EMAIL=${OWNER_EMAIL}
+PET_SA_EMAIL=${PET_SA_EMAIL}
+WELDER_ENABLED=${WELDER_ENABLED}
+SHM_SIZE=${SHM_SIZE}
+END
+
+ # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
+ # We only want to restart the existing container with the latest environment variables
+ ${DOCKER_COMPOSE} --env-file=/var/variables.env ${COMPLETE_RSTUDIO_DOCKER_COMPOSE} up -d --no-recreate
+
+ # update memory size, the memory swap must be updated as well (cannot be < memory)
+ docker update --memory ${MEM_LIMIT} --memory-swap ${MEM_LIMIT} $RSTUDIO_SERVER_NAME
+
+ # the docker containers need to be restarted or the R container
+ # will fail to start until the appropriate volume/device exists.
+ docker restart $RSTUDIO_SERVER_NAME
+ docker restart $WELDER_SERVER_NAME
+
+ fi
+else
+ # DATAPROC
+
+ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
+
+ # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
+ ${DOCKER_COMPOSE} ${COMPLETE_JUPYTER_DOCKER_COMPOSE} up -d --no-recreate
+
+ log 'Copy Jupyter frontend notebook config...'
+ $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
+ JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
+ docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
+
+ # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
+ # A better to do this might be to take welder host as an argument to the script
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/extensions/jupyter_delocalize.py"
+ fi
+fi
+
+
+validateCert ${CERT_DIRECTORY}
+
+# If a start user script was specified, execute it now. It should already be in the docker container
+# via initialization in init-actions.sh (we explicitly do not want to recopy it from GCS on every cluster resume).
+if [ ! -z ${START_USER_SCRIPT_URI} ] ; then
+ START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
+ log "Executing user start script [$START_USER_SCRIPT]..."
+
+ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ if [ "$USE_GCE_STARTUP_SCRIPT" == "true" ] ; then
+ docker cp /var/${START_USER_SCRIPT} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${START_USER_SCRIPT}
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} chmod +x ${JUPYTER_HOME}/${START_USER_SCRIPT}
+
+ docker exec --privileged -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
+ else
+ docker cp /etc/${START_USER_SCRIPT} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${START_USER_SCRIPT}
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} chmod +x ${JUPYTER_HOME}/${START_USER_SCRIPT}
+
+ docker exec --privileged -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
+ fi
+ fi
+
+ if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ docker cp /var/${START_USER_SCRIPT} ${RSTUDIO_SERVER_NAME}:${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT}
+ retry 3 docker exec -u root ${RSTUDIO_SERVER_NAME} chmod +x ${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT}
+
+ docker exec --privileged -u root ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
+ fi
+
+ failScriptIfError
+fi
+
+# By default GCE restarts containers on exit so we're not explicitly starting them below
+
+# Configuring Jupyter
+if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
+ echo "Starting Jupyter on cluster $GOOGLE_PROJECT / $CLUSTER_NAME..."
+ TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
+
+ # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
+ # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
+ # kernel tries to connect to it.
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
+
+ # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
+ # This is to make it so that older images will still work after we change notebooks location to home dir
+ docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
+
+ # Start Jupyter server
+ docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/run-jupyter.sh $NOTEBOOKS_DIR || /etc/jupyter/bin/jupyter notebook)"
+fi
+
+# Configuring RStudio, if enabled
+if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
+ echo "Starting RStudio on cluster $GOOGLE_PROJECT / $CLUSTER_NAME..."
+
+ TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
+
+ # Warm up R before starting the RStudio session (see above comment).
+ docker exec $RSTUDIO_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
+
+ # Start RStudio server
+ docker exec -d $RSTUDIO_SERVER_NAME /init
+fi
+
+# Start up crypto detector, if enabled.
+# This should be started after other containers.
+# Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
+# See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
+if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
+ docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
+ --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
+fi
+
+# Resize persistent disk if needed.
+# If it's GCE, we resize the PD. Dataproc doesn't have PD
+if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
+ echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..."
+ resize2fs ${DISK_DEVICE_ID}
+fi
diff --git a/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
deleted file mode 100644
index 2e8fb9de5b..0000000000
--- a/http/src/main/resources/base-init-resources/test-crypto-detector-docker-compose.yaml
+++ /dev/null
@@ -1 +0,0 @@
-crypto
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js b/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
deleted file mode 100644
index 63e50e2724..0000000000
--- a/http/src/main/resources/base-init-resources/test-google_plugin_jupyter.js
+++ /dev/null
@@ -1 +0,0 @@
-alert("Hello World!");
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js b/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
deleted file mode 100644
index b1af1b69e8..0000000000
--- a/http/src/main/resources/base-init-resources/test-google_plugin_jupyterlab.js
+++ /dev/null
@@ -1 +0,0 @@
-alert("Hello Lab!");
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-google_sign_in.js b/http/src/main/resources/base-init-resources/test-google_sign_in.js
deleted file mode 100644
index 80d24390c4..0000000000
--- a/http/src/main/resources/base-init-resources/test-google_sign_in.js
+++ /dev/null
@@ -1,2 +0,0 @@
-$(userEmailLoginHint)
-$(defaultClientId)
diff --git a/http/src/main/resources/base-init-resources/test-init-actions.sh b/http/src/main/resources/base-init-resources/test-init-actions.sh
deleted file mode 100644
index c182cd90ea..0000000000
--- a/http/src/main/resources/base-init-resources/test-init-actions.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-$(clusterName)
-$(googleProject)
-$(jupyterDockerImage)
-$(rstudioDockerImage)
-$(proxyDockerImage)
-$(jupyterUserScriptUri)
-$(jupyterStartUserScriptUri)
-$(jupyterServiceAccountCredentials)
-$(jupyterServerExtensions)
-$(jupyterNbExtensions)
-$(jupyterCombinedExtensions)
-$(jupyterUserScriptOutputUri)
-$(jupyterNotebookConfigUri)
-$(jupyterNotebookFrontendConfigUri)
-$(customEnvVarsConfigUri)
-$(memLimit)
-$(shmSize)
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh b/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
deleted file mode 100644
index 283c8e8018..0000000000
--- a/http/src/main/resources/base-init-resources/test-install-jupyter-extension.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-echo "Hello World"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
deleted file mode 100644
index 257cc5642c..0000000000
--- a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose-gce.yaml
+++ /dev/null
@@ -1 +0,0 @@
-foo
diff --git a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
deleted file mode 100644
index 1910281566..0000000000
--- a/http/src/main/resources/base-init-resources/test-jupyter-docker-compose.yaml
+++ /dev/null
@@ -1 +0,0 @@
-foo
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
deleted file mode 100644
index 18cd353694..0000000000
--- a/http/src/main/resources/base-init-resources/test-proxy-docker-compose.yaml
+++ /dev/null
@@ -1 +0,0 @@
-proxy
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
deleted file mode 100644
index ba7985a3fa..0000000000
--- a/http/src/main/resources/base-init-resources/test-rstudio-docker-compose.yaml
+++ /dev/null
@@ -1 +0,0 @@
-rstudio
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-site.conf b/http/src/main/resources/base-init-resources/test-site.conf
deleted file mode 100644
index 9f26b637f0..0000000000
--- a/http/src/main/resources/base-init-resources/test-site.conf
+++ /dev/null
@@ -1 +0,0 @@
-Foo
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml b/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
deleted file mode 100644
index d43a6afa23..0000000000
--- a/http/src/main/resources/base-init-resources/test-welder-docker-compose.yaml
+++ /dev/null
@@ -1 +0,0 @@
-welder
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
new file mode 100644
index 0000000000..f57f564a2b
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
@@ -0,0 +1,44 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ welder:
+ container_name: "${WELDER_SERVER_NAME}"
+ image: "${WELDER_DOCKER_IMAGE}"
+ ports:
+ - "8080:8080"
+ networks:
+ - app_network
+ entrypoint: "/opt/docker/bin/entrypoint.sh"
+ restart: always
+ environment:
+ # TODO what env vars are actually needed here?
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ STAGING_BUCKET: "${STAGING_BUCKET}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ SHOULD_BACKGROUND_SYNC: "${SHOULD_BACKGROUND_SYNC}"
+ CLOUD_PROVIDER: gcp
+ LOCKING_ENABLED: "true"
+ PORT: 8080
+ # The following envs aren't needed for GCP. But use dummy values just so welder will be able to read config properly
+ # TODO: remove as part of https://broadworkbench.atlassian.net/browse/AN-573
+ WSM_URL: "dummy"
+ WORKSPACE_ID: "dummy"
+ STORAGE_CONTAINER_RESOURCE_ID: "dummy"
+ STAGING_STORAGE_CONTAINER_RESOURCE_ID: "dummy"
+ AZURE_MANAGEMENT_URL: "dummy"
+ volumes:
+ # shared with jupyter
+ - ${WORK_DIRECTORY}:/work
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${WELDER_MEM_LIMIT}
+ mem_reservation: ${WELDER_MEM_LIMIT}
+ # disable swap by setting it to the same value as mem_limit
+ memswap_limit: ${WELDER_MEM_LIMIT}
+networks:
+ app_network:
+ external: true
diff --git a/http/src/main/resources/base-init-resources/welder-docker-compose.yaml b/http/src/main/resources/base-init-resources/welder-docker-compose.yaml
new file mode 100644
index 0000000000..509f72131e
--- /dev/null
+++ b/http/src/main/resources/base-init-resources/welder-docker-compose.yaml
@@ -0,0 +1,38 @@
+# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
+# configuring memory options in container mode. See discussion in:
+# https://docs.docker.com/compose/compose-file/#resources
+# https://github.com/docker/compose/issues/4513
+version: '2.4'
+services:
+ welder:
+ container_name: "${WELDER_SERVER_NAME}"
+ image: "${WELDER_DOCKER_IMAGE}"
+ entrypoint: "/opt/docker/bin/entrypoint.sh"
+ network_mode: host
+ restart: always
+ environment:
+ # TODO what env vars are actually needed here?
+ GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
+ STAGING_BUCKET: "${STAGING_BUCKET}"
+ CLUSTER_NAME: "${RUNTIME_NAME}"
+ RUNTIME_NAME: "${RUNTIME_NAME}"
+ OWNER_EMAIL: "${OWNER_EMAIL}"
+ SHOULD_BACKGROUND_SYNC: "${SHOULD_BACKGROUND_SYNC}"
+ CLOUD_PROVIDER: gcp
+ LOCKING_ENABLED: "true"
+ PORT: 8080
+ # The following envs aren't needed for GCP. But use dummy values just so welder will be able to read config properly
+ # TODO: remove as part of https://broadworkbench.atlassian.net/browse/AN-573
+ WSM_URL: "dummy"
+ WORKSPACE_ID: "dummy"
+ STORAGE_CONTAINER_RESOURCE_ID: "dummy"
+ STAGING_STORAGE_CONTAINER_RESOURCE_ID: "dummy"
+ AZURE_MANAGEMENT_URL: "dummy"
+ volumes:
+ # shared with jupyter
+ - ${WORK_DIRECTORY}:/work
+ # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
+ mem_limit: ${WELDER_MEM_LIMIT}
+ mem_reservation: ${WELDER_MEM_LIMIT}
+ # disable swap by setting it to the same value as mem_limit
+ memswap_limit: ${WELDER_MEM_LIMIT}
From f1df527e3ab2aa794936e2721b578cedb5e9baf1 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:39:20 -0500
Subject: [PATCH 09/22] remove changes
---
.../main/resources/init-resources/gce-init.sh | 69 +++++++++----------
.../resources/init-resources/init-actions.sh | 46 ++++++-------
.../jupyter-docker-compose-gce.yaml | 4 +-
.../jupyter-docker-compose.yaml | 4 +-
.../main/resources/init-resources/startup.sh | 8 +--
5 files changed, 62 insertions(+), 69 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index d567bf9e51..50196db598 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -40,9 +40,8 @@ START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
IS_GCE_FORMATTED=$(isGceFormatted)
# Needs to be in sync with terra-docker container
JUPYTER_HOME=/etc/jupyter
-JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
-JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
-USER_HOME=$(jupyterHomeDirectory)
+JUPYTER_SCRIPTS=$JUPYTER_HOME/scripts
+JUPYTER_USER_HOME=$(jupyterHomeDirectory)
RSTUDIO_SCRIPTS=/etc/rstudio/scripts
SERVER_CRT=$(proxyServerCrt)
SERVER_KEY=$(proxyServerKey)
@@ -411,11 +410,13 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# Install everything after having mounted the empty PD
# This should not be needed anymore if the jupyter home is a directory of the PD mount point
# See: https://github.com/DataBiosphere/leonardo/pull/4465/files
- if [ ! "$USER_HOME" = "/home/jupyter" ] ; then
+ if [ ! "$JUPYTER_USER_HOME" = "/home/jupyter" ] ; then
# TODO: Remove once we stop supporting non AI notebooks based images
- log 'Installing Jupyter kernelspecs'
+ log 'Installing Jupyter kernelspecs...(Remove once we stop supporting non AI notebooks based images)'
+ KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
+
# Install kernelspecs inside the Jupyter container
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
fi
# Install notebook.json which is used to populate Jupyter.notebook.config in JavaScript extensions.
@@ -446,14 +447,14 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext
fi
done
fi
@@ -467,9 +468,9 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext
fi
done
fi
@@ -484,9 +485,9 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext
fi
done
fi
@@ -502,14 +503,14 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
$GSUTIL_CMD cp -r $ext /var
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
+ retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext
fi
done
fi
@@ -523,26 +524,24 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
# For older jupyter images, jupyter_delocalize.py is using 127.0.0.1 as welder's url, which won't work now that we're no longer using `network_mode: host` for GCE VMs
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' $JUPYTER_EXTENSIONS/jupyter_delocalize.py"
-
- log 'Wget the gitignore_global file, set gitignore in Git Config'
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' /etc/jupyter/custom/jupyter_delocalize.py"
# Copy gitignore into jupyter container (ask AOU?)
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global \
- && git config --global core.excludesfile $USER_HOME/gitignore_global"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global"
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "whoami"
-
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "ls -l $JUPYTER_EXTENSIONS"
+ # Install nbstripout and set gitignore in Git Config (ask AOU?)
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
+ && nbstripout --install --global \
+ && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global"
# Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
- && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
+ && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
@@ -566,12 +565,8 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# done start user script
STEP_TIMINGS+=($(date +%s))
- # Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_localize_extension.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
-
log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+ retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
# done start Jupyter
STEP_TIMINGS+=($(date +%s))
@@ -644,4 +639,4 @@ log 'All done!'
ELAPSED_TIME=$(($END_TIME - $START_TIME))
log "gce-init.sh took $(display_time $ELAPSED_TIME)"
-log "Step timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
+log "Step timings: ${STEP_TIMINGS[@]}"
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index a411e8b49e..a48d0f2d99 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -159,8 +159,7 @@ if [[ "${ROLE}" == 'Master' ]]; then
bash add-google-cloud-ops-agent-repo.sh --also-install
JUPYTER_HOME=/etc/jupyter
- JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
- JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
+ JUPYTER_SCRIPTS=${JUPYTER_HOME}/scripts
KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
# Set variables
@@ -169,7 +168,7 @@ if [[ "${ROLE}" == 'Master' ]]; then
# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
export CLOUD_SERVICE='DATAPROC'
# Needs to be in sync with terra-docker container
- export USER_HOME=$(jupyterHomeDirectory)
+ export JUPYTER_USER_HOME=$(jupyterHomeDirectory)
export CLUSTER_NAME=$(clusterName)
export RUNTIME_NAME=$(clusterName)
export GOOGLE_PROJECT=$(googleProject)
@@ -276,7 +275,6 @@ EOF
# If any image is hosted in a GAR registry (detected by regex) then
# authorize docker to interact with gcr.io.
# NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect.
- # TODO (LM) test with non-broad account
if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
log 'Authorizing GCR/GAR...'
gcloud auth configure-docker
@@ -362,14 +360,14 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext
fi
done
fi
@@ -385,9 +383,9 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME}${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext
fi
done
fi
@@ -404,9 +402,9 @@ EOF
gsutil cp $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
+ retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext
fi
done
fi
@@ -440,14 +438,14 @@ EOF
gsutil cp -r $ext /etc
JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
JUPYTER_EXTENSION_FILE=`basename $ext`
curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
else
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
+ retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext
fi
done
fi
@@ -461,7 +459,7 @@ EOF
# jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
# A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py"
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
# This is to make it so that older images will still work after we change notebooks location to home dir
@@ -473,20 +471,20 @@ EOF
# Install nbstripout and set gitignore in Git Config
docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
&& python -m nbstripout --install --global \
- && git config --global core.excludesfile $USER_HOME/gitignore_global"
+ && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global"
# Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
- && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
+ docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
+ && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
+ && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
+ && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
+ retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
STEP_TIMINGS+=($(date +%s))
fi
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
index d94f5eb763..375e4209ee 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
@@ -33,8 +33,8 @@ services:
R_LIBS: "${NOTEBOOKS_DIR}/packages"
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
# We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
- PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
index 1a8d7f8e88..34f538e706 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
@@ -51,8 +51,8 @@ services:
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2.
# We should remove the two lines once we no longer support older images.
# When we update base image in terra-docker next time, we should verify the paths are still valid
- PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
+ PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HOME}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh
index 8ba25adcad..7e90819ac2 100644
--- a/http/src/main/resources/init-resources/startup.sh
+++ b/http/src/main/resources/init-resources/startup.sh
@@ -35,7 +35,7 @@ else
DOCKER_COMPOSE='docker-compose'
DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
fi
-export USER_HOME=$(jupyterHomeDirectory)
+export JUPYTER_USER_HOME=$(jupyterHomeDirectory)
export RSTUDIO_USER_HOME=/home/rstudio
export GOOGLE_PROJECT=$(googleProject)
export CLUSTER_NAME=$(clusterName)
@@ -145,7 +145,7 @@ function validateCert() {
DATAPROC_IMAGES_TO_RESTART+=(-f /etc/jupyter-docker-compose.yaml )
fi
- if [ "${CLOUD_SERVICE}" == 'DATAPROC' ]
+ if [ "${CLOUD_SERVICE}" == 'DATAPROC']
then
${DOCKER_COMPOSE} "${DATAPROC_IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
else
@@ -312,7 +312,7 @@ else
# jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
# A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/extensions/jupyter_delocalize.py"
+ docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py"
fi
fi
@@ -366,7 +366,7 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
# Start Jupyter server
- docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/run-jupyter.sh $NOTEBOOKS_DIR || /etc/jupyter/bin/jupyter notebook)"
+ docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/scripts/run-jupyter.sh $NOTEBOOKS_DIR || /opt/conda/bin/jupyter notebook)"
fi
# Configuring RStudio, if enabled
From de87f765d7b15623b833b2e2f147766160aca34a Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 2 Dec 2025 18:40:22 -0500
Subject: [PATCH 10/22] remove init resources changes
---
.../resources/base-init-resources/README.md | 23 -
.../base-init-resources/cloud-init.yml | 12 -
.../base-init-resources/cluster-site-gce.conf | 79 ---
.../base-init-resources/cluster-site.conf | 100 ---
.../resources/base-init-resources/gce-init.sh | 647 ------------------
.../gpu-docker-compose.yaml | 15 -
.../base-init-resources/init-actions.sh | 538 ---------------
.../jupyter-docker-compose-gce.yaml | 46 --
.../jupyter-docker-compose.yaml | 61 --
.../base-init-resources/notebook.json | 7 -
.../proxy-docker-compose-gce.yaml | 21 -
.../proxy-docker-compose.yaml | 19 -
.../rstudio-docker-compose-gce.yaml | 35 -
.../rstudio-docker-compose.yaml | 28 -
.../resources/base-init-resources/shutdown.sh | 40 --
.../resources/base-init-resources/startup.sh | 399 -----------
.../welder-docker-compose-gce.yaml | 44 --
.../welder-docker-compose.yaml | 38 -
.../config/ClusterResourcesConfig.scala | 3 +-
.../leonardo/util/GceInterpreter.scala | 7 +-
20 files changed, 2 insertions(+), 2160 deletions(-)
delete mode 100644 http/src/main/resources/base-init-resources/README.md
delete mode 100644 http/src/main/resources/base-init-resources/cloud-init.yml
delete mode 100755 http/src/main/resources/base-init-resources/cluster-site-gce.conf
delete mode 100755 http/src/main/resources/base-init-resources/cluster-site.conf
delete mode 100644 http/src/main/resources/base-init-resources/gce-init.sh
delete mode 100644 http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/init-actions.sh
delete mode 100644 http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
delete mode 100644 http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/notebook.json
delete mode 100644 http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
delete mode 100644 http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
delete mode 100644 http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
delete mode 100644 http/src/main/resources/base-init-resources/shutdown.sh
delete mode 100644 http/src/main/resources/base-init-resources/startup.sh
delete mode 100644 http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
delete mode 100644 http/src/main/resources/base-init-resources/welder-docker-compose.yaml
diff --git a/http/src/main/resources/base-init-resources/README.md b/http/src/main/resources/base-init-resources/README.md
deleted file mode 100644
index cc49061356..0000000000
--- a/http/src/main/resources/base-init-resources/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Jupyter nbextension development guide
-
-## Running locally
-To run plugins off local js files:
-1. [Install jupyter](https://jupyter.org/install)
-2. Run this command with the arguments to ensure the extension updates when you make changes to the .js files: `jupyter nbextension install /[absolute path to leo repo]/leonardo/src/main/resources/jupyter/ --symlink`
-3. To test edit-mode or safe-mode extensions, you must update the file to use local urls. For edit-mode, you can find a section of 4 variables near the top labelled `URLS for local testing` and a section labelled `URLS for leo deployment` above it. You can comment out the `URLS for leo deployment` and uncomment `URLS for local testing` (TODO: find a better way to do this)
-4. Run this for each extension in the jupyter/ dir you want enabled: ```jupyter nbextension enable jupyter/[File name WITHOUT EXTENSION]``` I.E., `jupyter nbextension enable jupyter/edit-mode`
-5. Run `jupyter notebook`. It should open the jupyter server in the browser window. You can verify the appropriate extension loaded via openning the developer console abd going to the `Sources` tab. On the file explorer on the left, you should find a folder called `nbextensions` containing the loaded extensions, possibly in `nbextensions -> jupyter`. Here you can place breakpoints to test functionality.
-
-## Misc Info
-
-Look at the jupyter_notebook_config and ensure your local config emulates what the settings are found in this file (of interest are port number and cors/auth settings)
-
-At the time of writing, there are 3 nbextensions, edit-mode.js, safe-mode.js, and google_sign_in.js
-
-extension_entry.js controls which plugins are loaded into the jupyter server image
-
-POST storageLinks/:
-`curl -vX POST --header 'Content-Type: application/json' --header 'Accept: application/json' [welderUrl]/storageLinks -d '{"localBaseDirectory": "[local dir relative to dir in welder conf, ex 'edit']", "localSafeModeBaseDirectory": "[local dir relative to dir in welder conf, ex 'safe']", "cloudStorageDirectory": "gs://jc-sample-bucket", "pattern": "*" }'`
-
-POST localize/:
-`curl -vX POST --header 'Content-Type: application/json' --header 'Accept: application/json' localhost:8081/objects -d '{"action" : "localize", "entries": [{ "sourceUri": "gs://jc-sample-bucket/Untitled.ipynb", "localDestinationPath": "edit/Untitled.ipynb" }] }'`
diff --git a/http/src/main/resources/base-init-resources/cloud-init.yml b/http/src/main/resources/base-init-resources/cloud-init.yml
deleted file mode 100644
index 46973abfc5..0000000000
--- a/http/src/main/resources/base-init-resources/cloud-init.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-#cloud-config
-
-write_files:
- - path: /etc/systemd/system/google-shutdown-scripts.service.d/override.conf
- permissions: 0644
- owner: root
- content: |
- [Unit]
- After=docker.service
-
-runcmd:
- - systemctl daemon-reload
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/cluster-site-gce.conf b/http/src/main/resources/base-init-resources/cluster-site-gce.conf
deleted file mode 100755
index 9f4c4dbf8e..0000000000
--- a/http/src/main/resources/base-init-resources/cluster-site-gce.conf
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
- SSLEngine on
- SSLProxyEngine on
- SSLCertificateFile "/etc/ssl/certs/server.crt"
- SSLCertificateKeyFile "/etc/ssl/private/server.key"
- SSLCACertificateFile "/etc/ssl/certs/ca-bundle.crt"
-
- SSLVerifyClient require
- SSLVerifyDepth 10
-
- ServerName ${PROXY_SERVER_HOST_NAME}
- UseCanonicalName on
- ProxyRequests off
-
- RewriteEngine on
-
- ################
- # RStudio
- ################
- RewriteCond %{HTTP:Upgrade} =websocket
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) ws://rstudio:8001/$1 [P,L]
-
- RewriteCond %{HTTP:Upgrade} !=websocket
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) http://rstudio:8001/$1 [P,L]
-
- # Include a ProxyPassReverse so redirects by RStudio go to the correct server name (e.g. https://notebooks.firecloud.org)
- # Need to include both http and https, as RStudio redirects to https in some cases.
- ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ http://rstudio:8001/
- ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ https://rstudio:8001/
-
- # Append SameSite=None to cookies set by RStudio. This is required by some browsers because we
- # render RStudio in an iframe. There does not appear to be a way within RStudio to do this, hence
- # doing it in the proxy.
- # [IA-4997] to support CHIPS by setting partitioned cookies
- # Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly;Partitioned "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
- Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
-
- ####################
- # Welder
- ####################
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/welder/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/welder/(.*) http://welder:8080/$1 [P,L]
-
- #####################################
- # Jupyter (legacy /notebooks path)
- #####################################
-
- RewriteCond %{HTTP:Upgrade} =websocket
- RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
- RewriteRule .* ws://jupyter:8000%{REQUEST_URI} [P,L]
-
- RewriteCond %{HTTP:Upgrade} !=websocket
- RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
- RewriteRule .* http://jupyter:8000%{REQUEST_URI} [P,L]
-
- # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
-
- ################################
- # Jupyter (newer /proxy path)
- ################################
-
- # This needs to be coordinated with a change in jupyter_notebooks_config.py
- # which is why we haven't yet enabled this.
-
- # RewriteCond %{HTTP:Upgrade} =websocket
- # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
- # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) ws://127.0.0.1:8000/$1 [P,L]
-
- # RewriteCond %{HTTP:Upgrade} !=websocket
- # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
- # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) http://127.0.0.1:8000/$1 [P,L]
-
- # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
-
-
diff --git a/http/src/main/resources/base-init-resources/cluster-site.conf b/http/src/main/resources/base-init-resources/cluster-site.conf
deleted file mode 100755
index 46245b75e8..0000000000
--- a/http/src/main/resources/base-init-resources/cluster-site.conf
+++ /dev/null
@@ -1,100 +0,0 @@
-
-
- SSLEngine on
- SSLProxyEngine on
- SSLCertificateFile "/etc/ssl/certs/server.crt"
- SSLCertificateKeyFile "/etc/ssl/private/server.key"
- SSLCACertificateFile "/etc/ssl/certs/ca-bundle.crt"
-
- SSLVerifyClient require
- SSLVerifyDepth 10
-
- ServerName ${PROXY_SERVER_HOST_NAME}
- UseCanonicalName on
- ProxyRequests off
-
- RewriteEngine on
-
- ################
- # Spark Web UIs
- ################
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/gateway/.* [NC]
- RewriteRule .* http://127.0.0.1:8443%{REQUEST_URI} [P,L]
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/yarn/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/yarn/(.*) http://127.0.0.1:8443/yarn/$1 [P,L]
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jobhistory/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/jobhistory/(.*) http://127.0.0.1:8443/jobhistory/$1 [P,L]
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/apphistory/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/apphistory/(.*) http://127.0.0.1:8443/apphistory/$1 [P,L]
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/sparkhistory/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/sparkhistory/(.*) http://127.0.0.1:8443/sparkhistory/$1 [P,L]
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/hdfs/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/hdfs/(.*) http://127.0.0.1:8443/hdfs/$1 [P,L]
-
- ################
- # RStudio
- ################
- RewriteCond %{HTTP:Upgrade} =websocket
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) ws://127.0.0.1:8001/$1 [P,L]
-
- RewriteCond %{HTTP:Upgrade} !=websocket
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/rstudio/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/rstudio/(.*) http://127.0.0.1:8001/$1 [P,L]
-
- # Include a ProxyPassReverse so redirects by RStudio go to the correct server name (e.g. https://notebooks.firecloud.org)
- # Need to include both http and https, as RStudio redirects to https in some cases.
- ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ http://127.0.0.1:8001/
- ProxyPassReverse /proxy/${GOOGLE_PROJECT}/${RUNTIME_NAME}/rstudio/ https://127.0.0.1:8001/
-
- # Append SameSite=None to cookies set by RStudio. This is required by some browsers because we
- # render RStudio in an iframe. There does not appear to be a way within RStudio to do this, hence
- # doing it in the proxy.
- # [IA-4997] to support CHIPS by setting partitioned cookies
- # Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly;Partitioned "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
- Header edit Set-Cookie ^(.*)$ $1;Secure;SameSite=None;HttpOnly "expr=%{REQUEST_URI} =~ m#/proxy/[^/]*/[^/]*/rstudio/.*#"
-
- ####################
- # Welder
- ####################
-
- RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/welder/.* [NC]
- RewriteRule /proxy/[^/]*/[^/]*/welder/(.*) http://127.0.0.1:8080/$1 [P,L]
-
- #####################################
- # Jupyter (legacy /notebooks path)
- #####################################
-
- RewriteCond %{HTTP:Upgrade} =websocket
- RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
- RewriteRule .* ws://127.0.0.1:8000%{REQUEST_URI} [P,L]
-
- RewriteCond %{HTTP:Upgrade} !=websocket
- RewriteCond %{REQUEST_URI} /notebooks/[^/]*/[^/]*/.* [NC]
- RewriteRule .* http://127.0.0.1:8000%{REQUEST_URI} [P,L]
-
- # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
-
- ################################
- # Jupyter (newer /proxy path)
- ################################
-
- # This needs to be coordinated with a change in jupyter_notebooks_config.py
- # which is why we haven't yet enabled this.
-
- # RewriteCond %{HTTP:Upgrade} =websocket
- # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
- # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) ws://127.0.0.1:8000/$1 [P,L]
-
- # RewriteCond %{HTTP:Upgrade} !=websocket
- # RewriteCond %{REQUEST_URI} /proxy/[^/]*/[^/]*/jupyter/.* [NC]
- # RewriteRule /proxy/[^/]*/[^/]*/jupyter/(.*) http://127.0.0.1:8000/$1 [P,L]
-
- # Note Jupyter doesn't need ProxyPassReverse because the redirect URL is configured in jupyter_notebook_config.py
-
-
diff --git a/http/src/main/resources/base-init-resources/gce-init.sh b/http/src/main/resources/base-init-resources/gce-init.sh
deleted file mode 100644
index d567bf9e51..0000000000
--- a/http/src/main/resources/base-init-resources/gce-init.sh
+++ /dev/null
@@ -1,647 +0,0 @@
-#!/usr/bin/env bash
-
-# Borrowed from init-action.sh as our GCE offering came after the dataproc cluster one.
-# This init script instantiates the tool (e.g. Jupyter) docker images on Google Compute Engine instances created by Leo.
-
-set -e -x
-
-# Set variables
-# Values like $(..) are populated by Leo when a cluster is created.
-# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
-# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
-export CLOUD_SERVICE='GCE'
-export CLUSTER_NAME=$(clusterName)
-export RUNTIME_NAME=$(clusterName)
-export GOOGLE_PROJECT=$(googleProject)
-export STAGING_BUCKET=$(stagingBucketName)
-export OWNER_EMAIL=$(loginHint)
-export PET_SA_EMAIL=$(petSaEmail)
-export JUPYTER_SERVER_NAME=$(jupyterServerName)
-export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
-export WELDER_SERVER_NAME=$(welderServerName)
-export WELDER_DOCKER_IMAGE=$(welderDockerImage)
-export RSTUDIO_SERVER_NAME=$(rstudioServerName)
-export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
-export RSTUDIO_USER_HOME=/home/rstudio
-export PROXY_SERVER_NAME=$(proxyServerName)
-export PROXY_DOCKER_IMAGE=$(proxyDockerImage)
-export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
-export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
-export MEM_LIMIT=$(memLimit)
-export SHM_SIZE=$(shmSize)
-export WELDER_MEM_LIMIT=$(welderMemLimit)
-export PROXY_SERVER_HOST_NAME=$(proxyServerHostName)
-export WELDER_ENABLED=$(welderEnabled)
-export NOTEBOOKS_DIR=$(notebooksDir)
-
-START_USER_SCRIPT_URI=$(startUserScriptUri)
-# Include a timestamp suffix to differentiate different startup logs across restarts.
-START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
-IS_GCE_FORMATTED=$(isGceFormatted)
-# Needs to be in sync with terra-docker container
-JUPYTER_HOME=/etc/jupyter
-JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
-JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
-USER_HOME=$(jupyterHomeDirectory)
-RSTUDIO_SCRIPTS=/etc/rstudio/scripts
-SERVER_CRT=$(proxyServerCrt)
-SERVER_KEY=$(proxyServerKey)
-ROOT_CA=$(rootCaPem)
-JUPYTER_DOCKER_COMPOSE_GCE=$(jupyterDockerCompose)
-RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose)
-PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
-WELDER_DOCKER_COMPOSE=$(welderDockerCompose)
-GPU_DOCKER_COMPOSE=$(gpuDockerCompose)
-PROXY_SITE_CONF=$(proxySiteConf)
-JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions)
-JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions)
-JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions)
-JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions)
-USER_SCRIPT_URI=$(userScriptUri)
-USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri)
-JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
-CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri)
-GPU_ENABLED=$(gpuEnabled)
-INIT_BUCKET_NAME=$(initBucketName)
-
-CERT_DIRECTORY='/var/certs'
-DOCKER_COMPOSE_FILES_DIRECTORY='/var/docker-compose-files'
-WORK_DIRECTORY='/mnt/disks/work'
-# Toolbox is specific to COS images and is needed to access functionalities like gcloud
-# See https://cloud.google.com/container-optimized-os/docs/how-to/toolbox
-GSUTIL_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gsutil'
-GCLOUD_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gcloud'
-
-# Welder configuration, Rstudio files are saved every X seconds in the background but Jupyter notebooks are not
-if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- export SHOULD_BACKGROUND_SYNC="true"
-else
- export SHOULD_BACKGROUND_SYNC="false"
-fi
-
-#####################################################################################################
-# Functions
-#####################################################################################################
-
-# Retry a command up to a specific number of times until it exits successfully,
-# with exponential back off. For example:
-#
-# $ retry 5 echo "Hello"
-# Hello
-#
-# $ retry 5 false
-# Retry 1/5 exited 1, retrying in 2 seconds...
-# Retry 2/5 exited 1, retrying in 4 seconds...
-# Retry 3/5 exited 1, retrying in 8 seconds...
-# Retry 4/5 exited 1, retrying in 16 seconds...
-# Retry 5/5 exited 1, no more retries left.
-function retry {
- local retries=$1
- shift
-
- for ((i = 1; i <= $retries; i++)); do
- # run with an 'or' so set -e doesn't abort the bash script on errors
- exit=0
- "$@" || exit=$?
- if [ $exit -eq 0 ]; then
- return 0
- fi
- wait=$((2 ** $i))
- if [ $i -eq $retries ]; then
- log "Retry $i/$retries exited $exit, no more retries left."
- break
- fi
- log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
- sleep $wait
- done
- return 1
-}
-
-function log() {
- echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
-}
-
-display_time() {
- local T=$1
- local D=$((T/60/60/24))
- local H=$((T/60/60%24))
- local M=$((T/60%60))
- local S=$((T%60))
- (( $D > 0 )) && printf '%d days ' $D
- (( $H > 0 )) && printf '%d hours ' $H
- (( $M > 0 )) && printf '%d minutes ' $M
- (( $D > 0 || $H > 0 || $M > 0 )) && printf 'and '
- printf '%d seconds\n' $S
-}
-
-function apply_user_script() {
- # User script to be executed once at creation time, but will not persist when the runtime is paused / resumed
- local CONTAINER_NAME=$1
- local TARGET_DIR=$2
-
- log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..."
- USER_SCRIPT=`basename ${USER_SCRIPT_URI}`
- if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then
- $GSUTIL_CMD cp ${USER_SCRIPT_URI} /var &> /var/user_script_copy_output.txt
- else
- curl "${USER_SCRIPT_URI}" -o /var/"${USER_SCRIPT}"
- fi
- docker cp /var/"${USER_SCRIPT}" ${CONTAINER_NAME}:${TARGET_DIR}/"${USER_SCRIPT}"
- # Note that we are running as root
- retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/"${USER_SCRIPT}"
-
- # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing
- # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker
- # can gain full access to the VM already, so using this flag is not a significant escalation.
- EXIT_CODE=0
- docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/"${USER_SCRIPT}" &> /var/us_output.txt || EXIT_CODE=$?
-
- # Should dump error in staging bucket so we can display that back as part of the error message
- if [ $EXIT_CODE -ne 0 ]; then
- log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI."
- retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"false" cp /var/us_output.txt ${USER_SCRIPT_OUTPUT_URI}
- exit $EXIT_CODE
- else
- retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"true" cp /var/us_output.txt ${USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-function apply_start_user_script() {
- # User script to be executed at each startup time so the changes will persist between pause/resume cycles.
- # Only used by the AOU Workbench, not Terra yet.
- # See https://broadworkbench.atlassian.net/browse/IA-5054
- local CONTAINER_NAME=$1
- local TARGET_DIR=$2
-
- log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..."
- START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
- if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then
- $GSUTIL_CMD cp ${START_USER_SCRIPT_URI} /var
- else
- curl $START_USER_SCRIPT_URI -o /var/${START_USER_SCRIPT}
- fi
- docker cp /var/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT}
- retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT}
-
- # Keep in sync with startup.sh
- EXIT_CODE=0
- docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
- if [ $EXIT_CODE -ne 0 ]; then
- echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
- retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"false" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- exit $EXIT_CODE
- else
- retry 3 $GSUTIL_CMD -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-#####################################################################################################
-# Main starts here.
-#####################################################################################################
-
-log "Running GCE VM init script..."
-
-# Array for instrumentation
-# UPDATE THIS IF YOU ADD MORE STEPS:
-# currently the steps are:
-# START init,
-# .. after persistent disk setup
-# .. after copying files from the GCS init bucket
-# .. after starting google-fluentd
-# .. after docker compose
-# .. after welder start
-# .. after extension install
-# .. after user script
-# .. after start user script
-# .. after start Jupyter
-# END
-
-## Used for profiling
-START_TIME=$(date +%s)
-STEP_TIMINGS=($(date +%s))
-
-
-DOCKER_COMPOSE="docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2"
-
-mkdir -p ${WORK_DIRECTORY}
-mkdir -p ${CERT_DIRECTORY}
-mkdir -p ${DOCKER_COMPOSE_FILES_DIRECTORY}
-
-log 'Formatting and mounting persistent disk...'
-
-# Format and mount persistent disk
-## The PD should be the only `sd` disk that is not mounted yet
-AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd"))
-FreesdDisks=()
-for Disk in "${AllsdDisks[@]}"; do
- Mounts="$(lsblk -no MOUNTPOINT "${Disk}")"
- if [ -z "$Mounts" ]; then
- echo "Found our unmounted persistent disk!"
- FreesdDisks="${Disk}"
- else
- echo "Not our persistent disk!"
- fi
-done
-DISK_DEVICE_ID=${FreesdDisks}
-
-## Only format disk is it hasn't already been formatted
-if [ "$IS_GCE_FORMATTED" == "false" ] ; then
- # It's likely that the persistent disk was previously mounted on another VM and wasn't properly unmounted
- # either because the VM was terminated or there is no unmount in the shutdown sequence and occasionally
- # fs is getting marked as not clean.
- # Passing -F -F to mkfs.ext4 should force the tool to ignore the state of the partition.
- # Note that there should be two instances command-line switch (-F -F) to override this check
-
- mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard ${DISK_DEVICE_ID} -F -F
-fi
-
-mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY}
-
-# done persistent disk setup
-STEP_TIMINGS+=($(date +%s))
-
-# Enable GPU drivers on top of the base Google DeepLearning default image
-if [ "${GPU_ENABLED}" == "true" ] ; then
- log 'Installing GPU driver...'
- version="535.154.05"
- isAvailable=$(cos-extensions list|grep $version)
- if [[ -z "$isAvailable" ]]; then
- # Install default version on the COS image
- cos-extensions install gpu
- else
- cos-extensions install gpu -- --version $version
- fi
- mount --bind /var/lib/nvidia /var/lib/nvidia
- mount -o remount,exec /var/lib/nvidia
-
- $GSUTIL_CMD cp ${GPU_DOCKER_COMPOSE} ${DOCKER_COMPOSE_FILES_DIRECTORY}
-fi
-
-log 'Copying secrets from GCS...'
-
-# Add the certificates from the bucket to the VM. They are used by the docker-compose file
-$GSUTIL_CMD cp ${SERVER_CRT} ${CERT_DIRECTORY}
-$GSUTIL_CMD cp ${SERVER_KEY} ${CERT_DIRECTORY}
-$GSUTIL_CMD cp ${ROOT_CA} ${CERT_DIRECTORY}
-$GSUTIL_CMD cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY}
-
-
-# Install env var config (e.g. AOU / Terra use it to inject workspace name)
-# e.g. {
- # "WORKSPACE_NAME": "CARJune24",
- # "WORKSPACE_NAMESPACE": "callisto-dev",
- # "WORKSPACE_BUCKET": "gs://fc-09516ff0-136e-4874-8484-1be0afa267a6",
- # "GOOGLE_PROJECT": "terra-dev-e67d9572",
- # "CUSTOM_IMAGE": "false",
- # "DRS_RESOLVER_ENDPOINT": "api/v4/drs/resolve",
- # "TERRA_DEPLOYMENT_ENV": "dev"
- #}
-if [ ! -z "$CUSTOM_ENV_VARS_CONFIG_URI" ] ; then
- log 'Copy custom env vars config...'
- $GSUTIL_CMD cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var
-fi
-
-# done GCS copy
-STEP_TIMINGS+=($(date +%s))
-
-log 'Starting up the Jupyter...'
-
-# Run docker-compose for each specified compose file.
-# Note the `docker-compose pull` is retried to avoid intermittent network errors, but
-# `docker-compose up` is not retried since if that fails, something is probably broken
-# and wouldn't be remedied by retrying
-COMPOSE_FILES=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_DOCKER_COMPOSE}`)
-cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_DOCKER_COMPOSE}`
-if [ ! -z "$WELDER_DOCKER_IMAGE" ] && [ "$WELDER_ENABLED" == "true" ] ; then
- COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${WELDER_DOCKER_COMPOSE}`)
- cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${WELDER_DOCKER_COMPOSE}`
-fi
-
-if [ "${GPU_ENABLED}" == "true" ] ; then
- COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`)
- if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- # Little bit of hack to switch the jupyter paths to the rstudio ones. Should have separate docker gpu compose of rJupyter and Rstudio instead
- sed -i 's/jupyter/rstudio/g' ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
- sed -i 's#${NOTEBOOKS_DIR}#/home/rstudio#g' ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
- fi
- cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${GPU_DOCKER_COMPOSE}`
-fi
-
-if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
- COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${JUPYTER_DOCKER_COMPOSE_GCE}`)
- cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${JUPYTER_DOCKER_COMPOSE_GCE}`
-fi
-
-if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
- COMPOSE_FILES+=(-f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${RSTUDIO_DOCKER_COMPOSE}`)
- cat ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${RSTUDIO_DOCKER_COMPOSE}`
-fi
-
-tee /var/variables.env << END
-CERT_DIRECTORY=${CERT_DIRECTORY}
-WORK_DIRECTORY=${WORK_DIRECTORY}
-PROXY_SERVER_NAME=${PROXY_SERVER_NAME}
-PROXY_DOCKER_IMAGE=${PROXY_DOCKER_IMAGE}
-GOOGLE_PROJECT=${GOOGLE_PROJECT}
-RUNTIME_NAME=${RUNTIME_NAME}
-PROXY_SERVER_HOST_NAME=${PROXY_SERVER_HOST_NAME}
-JUPYTER_SERVER_NAME=${JUPYTER_SERVER_NAME}
-JUPYTER_DOCKER_IMAGE=${JUPYTER_DOCKER_IMAGE}
-NOTEBOOKS_DIR=${NOTEBOOKS_DIR}
-OWNER_EMAIL=${OWNER_EMAIL}
-PET_SA_EMAIL=${PET_SA_EMAIL}
-WELDER_ENABLED=${WELDER_ENABLED}
-MEM_LIMIT=${MEM_LIMIT}
-SHM_SIZE=${SHM_SIZE}
-WELDER_SERVER_NAME=${WELDER_SERVER_NAME}
-WELDER_DOCKER_IMAGE=${WELDER_DOCKER_IMAGE}
-STAGING_BUCKET=${STAGING_BUCKET}
-WELDER_MEM_LIMIT=${WELDER_MEM_LIMIT}
-JUPYTER_SCRIPTS=${JUPYTER_SCRIPTS}
-HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}`
-DOCKER_COMPOSE_FILES_DIRECTORY=${DOCKER_COMPOSE_FILES_DIRECTORY}
-RSTUDIO_SERVER_NAME=${RSTUDIO_SERVER_NAME}
-RSTUDIO_DOCKER_IMAGE=${RSTUDIO_DOCKER_IMAGE}
-SHOULD_BACKGROUND_SYNC=${SHOULD_BACKGROUND_SYNC}
-RSTUDIO_USER_HOME=${RSTUDIO_USER_HOME}
-END
-
-# Create a network that allows containers to talk to each other via exposed ports
-docker network create -d bridge app_network
-
-# Dumps the rendered yaml to the init script log.
-${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" config
-
-# Docker Pull
-log 'Pulling docker images...'
-if ! retry 5 ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" pull &> /var/docker_pull_output.txt; then
- # if coming from a private repo on GCR, need to use credentials supplied in cryptopants/docker-compose-gcr
- # (see https://hub.docker.com/r/cryptopants/docker-compose-gcr)
- log 'Docker pull failed. Private image, trying with cryptopants/docker-compose-gcr...'
- DOCKER_COMPOSE="docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var -w=/var cryptopants/docker-compose-gcr"
- retry 5 ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" pull &> /var/docker_pull_output.txt;
-fi
-
-# This needs to happen before we start up containers because the jupyter user needs to be the owner of the PD
-chmod a+rwx ${WORK_DIRECTORY}
-
-# Docker compose up, starting all of the containers
-${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d
-
-# Start up crypto detector, if enabled.
-# This should be started after other containers.
-# Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
-# See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
-if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
- docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
- --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
-fi
-
-# done welder start
-STEP_TIMINGS+=($(date +%s))
-
-# Jupyter-specific setup, only do if Jupyter is installed
-if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- # user package installation directory
- mkdir -p ${WORK_DIRECTORY}/packages
- chmod a+rwx ${WORK_DIRECTORY}/packages
-
- # Install everything after having mounted the empty PD
- # This should not be needed anymore if the jupyter home is a directory of the PD mount point
- # See: https://github.com/DataBiosphere/leonardo/pull/4465/files
- if [ ! "$USER_HOME" = "/home/jupyter" ] ; then
- # TODO: Remove once we stop supporting non AI notebooks based images
- log 'Installing Jupyter kernelspecs'
- # Install kernelspecs inside the Jupyter container
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
- fi
-
- # Install notebook.json which is used to populate Jupyter.notebook.config in JavaScript extensions.
- # This is used in the edit-mode.js extension that Terra/AoU use.
- if [ ! -z "$JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI" ] ; then
- log 'Copy Jupyter frontend notebook config...'
- $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
- JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
- docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
- fi
-
- # Install NbExtensions. These are user-specified Jupyter extensions.
- # For instance Terra UI is passing
- # {
- # "nbExtensions": {
- # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js"
- # },
- # "labExtensions": {},
- # "serverExtensions": {},
- # "combinedExtensions": {}
-# }
- if [ ! -z "$JUPYTER_NB_EXTENSIONS" ] ; then
- for ext in ${JUPYTER_NB_EXTENSIONS}
- do
- log "Installing Jupyter NB extension [$ext]..."
- if [[ $ext == 'gs://'* ]]; then
- $GSUTIL_CMD cp $ext /var
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
- JUPYTER_EXTENSION_FILE=`basename $ext`
- curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
- docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
- fi
- done
- fi
-
- # Install serverExtensions if provided by the user
- if [ ! -z "$JUPYTER_SERVER_EXTENSIONS" ] ; then
- for ext in ${JUPYTER_SERVER_EXTENSIONS}
- do
- log "Installing Jupyter server extension [$ext]..."
- if [[ $ext == 'gs://'* ]]; then
- $GSUTIL_CMD cp $ext /var
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
- fi
- done
- fi
-
- # Install combined extensions if provided by the user
- if [ ! -z "$JUPYTER_COMBINED_EXTENSIONS" ] ; then
- for ext in ${JUPYTER_COMBINED_EXTENSIONS}
- do
- log "Installing Jupyter combined extension [$ext]..."
- log $ext
- if [[ $ext == 'gs://'* ]]; then
- $GSUTIL_CMD cp $ext /var
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
- fi
- done
- fi
-
- # Install lab extensions if provided by the user
- # Note: lab extensions need to installed as jupyter user, not root
- if [ ! -z "$JUPYTER_LAB_EXTENSIONS" ] ; then
- for ext in ${JUPYTER_LAB_EXTENSIONS}
- do
- log "Installing JupyterLab extension [$ext]..."
- pwd
- if [[ $ext == 'gs://'* ]]; then
- $GSUTIL_CMD cp -r $ext /var
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /var/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
- JUPYTER_EXTENSION_FILE=`basename $ext`
- curl $ext -o /var/${JUPYTER_EXTENSION_FILE}
- docker cp /var/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- else
- retry 3 docker exec -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
- fi
- done
- fi
-
- # done extension setup
- STEP_TIMINGS+=($(date +%s))
-
- # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
- # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
- # kernel tries to connect to it.
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
-
- # For older jupyter images, jupyter_delocalize.py is using 127.0.0.1 as welder's url, which won't work now that we're no longer using `network_mode: host` for GCE VMs
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/127.0.0.1/welder/g' $JUPYTER_EXTENSIONS/jupyter_delocalize.py"
-
- log 'Wget the gitignore_global file, set gitignore in Git Config'
-
- # Copy gitignore into jupyter container (ask AOU?)
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget -N https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global \
- && git config --global core.excludesfile $USER_HOME/gitignore_global"
-
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "whoami"
-
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "ls -l $JUPYTER_EXTENSIONS"
-
- # Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
- && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
- && mkdir -p $JUPYTER_HOME/nbconfig"
-
- # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
- # This is to make it so that older images will still work after we change notebooks location to home dir
- docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
-
- # If a user script was specified, copy it into the docker container and execute it.
- if [ ! -z "$USER_SCRIPT_URI" ] ; then
- log 'Starting user script...'
- apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
- fi
-
- # done user script
- STEP_TIMINGS+=($(date +%s))
-
- # If a start user script was specified, copy it into the docker container for consumption during startups.
- if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
- apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
- fi
-
- # done start user script
- STEP_TIMINGS+=($(date +%s))
-
- # Move jupyter_localize_extension and jupyter_delocalize to site-packages so they can be found by jupyter
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mv ${JUPYTER_EXTENSIONS}/jupyter_localize_extension.py ${JUPYTER_HOME}/lib/python3.10/site-packages"
-
- log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
-
- # done start Jupyter
- STEP_TIMINGS+=($(date +%s))
-fi
-
-# RStudio specific setup; only do if RStudio is installed
-if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- EXIT_CODE=0
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$?
- if [ $EXIT_CODE -ne 0 ]; then
- echo "RStudio user package installation directory creation failed, creating /packages directory"
- docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages"
- fi
-
- # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT
-CLUSTER_NAME=$CLUSTER_NAME
-RUNTIME_NAME=$RUNTIME_NAME
-OWNER_EMAIL=$OWNER_EMAIL
-SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC
-RSTUDIO_USER_HOME=$RSTUDIO_USER_HOME" >> /usr/local/lib/R/etc/Renviron.site'
-
- # Add custom_env_vars.env to Renviron.site
- CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env
- if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then
- retry 3 docker cp /var/custom_env_vars.env ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/etc/custom_env_vars.env
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/etc/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site'
- fi
-
- # For older rstudio images, /etc/rstudio/rserver.conf is using 127.0.0.1 as www-address, which won't work now that we're no longer using `network_mode: host` for GCE VMs
- docker exec ${RSTUDIO_SERVER_NAME} sed -i "s/127.0.0.1/0.0.0.0/g" /etc/rstudio/rserver.conf
-
- # If a user script was specified, copy it into the docker container and execute it.
- if [ ! -z "$USER_SCRIPT_URI" ] ; then
- apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
- fi
-
- # If a start user script was specified, copy it into the docker container for consumption during startups.
- if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
- apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
- fi
-
- # default autosave to 10 seconds
- docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'mkdir -p $RSTUDIO_USER_HOME/.config/rstudio \
- && echo "{
-\"initial_working_directory\": \"~\",
-\"auto_save_on_blur\": true,
-\"auto_save_on_idle\": \"commit\",
-\"posix_terminal_shell\": \"bash\",
-\"auto_save_idle_ms\": 10000
-}" > $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs-temp.json \
- && mv $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs-temp.json $RSTUDIO_USER_HOME/.config/rstudio/rstudio-prefs.json \
- && chown -R rstudio:users $RSTUDIO_USER_HOME/.config'
-
- # Start RStudio server
- retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init
-fi
-
-# Resize persistent disk if needed.
-echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..."
-resize2fs ${DISK_DEVICE_ID}
-
-
-# Remove any unneeded cached images to save disk space.
-# Do this asynchronously so it doesn't hold up cluster creation
-log 'Pruning docker images...'
-docker image prune -a -f &
-
-log 'All done!'
-
-ELAPSED_TIME=$(($END_TIME - $START_TIME))
-log "gce-init.sh took $(display_time $ELAPSED_TIME)"
-log "Step timings: ${STEP_TIMINGS[@]}"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml b/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
deleted file mode 100644
index 7412d51743..0000000000
--- a/http/src/main/resources/base-init-resources/gpu-docker-compose.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- jupyter:
- volumes:
- # shared with welder
- - "/mnt/disks/work:${NOTEBOOKS_DIR}"
- - "/var/lib/nvidia/lib64:/usr/local/nvidia/lib64"
- - "/var/lib/nvidia/bin:/usr/local/nvidia/bin"
- devices:
- - "/dev/nvidia-uvm:/dev/nvidia-uvm"
- - "/dev/nvidiactl:/dev/nvidiactl"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/init-actions.sh b/http/src/main/resources/base-init-resources/init-actions.sh
deleted file mode 100644
index a411e8b49e..0000000000
--- a/http/src/main/resources/base-init-resources/init-actions.sh
+++ /dev/null
@@ -1,538 +0,0 @@
-#!/usr/bin/env bash
-
-set -e -x
-
-# This is the very first script as we started on Dataproc
-#
-# This init script instantiates the tool (e.g. Jupyter) docker images on the Dataproc cluster master node.
-# Adapted from https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/datalab/datalab.sh
-#
-
-#
-# Functions
-#
-
-# Retry a command up to a specific number of times until it exits successfully,
-# with exponential back off.
-#
-# $ retry 5 echo "Hello"
-# Hello
-#
-# $ retry 5 false
-# Retry 1/5 exited 1, retrying in 2 seconds...
-# Retry 2/5 exited 1, retrying in 4 seconds...
-# Retry 3/5 exited 1, retrying in 8 seconds...
-# Retry 4/5 exited 1, retrying in 16 seconds...
-# Retry 5/5 exited 1, no more retries left.
-function retry {
- local retries=$1
- shift
-
- for ((i = 1; i <= $retries; i++)); do
- # run with an 'or' so set -e doesn't abort the bash script on errors
- exit=0
- "$@" || exit=$?
- if [ $exit -eq 0 ]; then
- return 0
- fi
- wait=$((2 ** $i))
- if [ $i -eq $retries ]; then
- log "Retry $i/$retries exited $exit, no more retries left."
- break
- fi
- log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
- sleep $wait
- done
- return 1
-}
-
-function log() {
- echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
-}
-
-function betterAptGet() {
- if ! { apt-get update 2>&1 || echo E: update failed; } | grep -q '^[WE]:'; then
- return 0
- else
- return 1
- fi
-}
-
-function apply_user_script() {
- local CONTAINER_NAME=$1
- local TARGET_DIR=$2
-
- log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..."
- USER_SCRIPT=`basename ${USER_SCRIPT_URI}`
- if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then
- gsutil cp ${USER_SCRIPT_URI} /etc
- else
- curl $USER_SCRIPT_URI -o /etc/${USER_SCRIPT}
- fi
- docker cp /etc/${USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${USER_SCRIPT}
- retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${USER_SCRIPT}
-
- # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing
- # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker
- # can gain full access to the VM already, so using this flag is not a significant escalation.
- EXIT_CODE=0
- docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${USER_SCRIPT} &> us_output.txt || EXIT_CODE=$?
-
- if [ $EXIT_CODE -ne 0 ]; then
- log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI."
- retry 3 gsutil -h "x-goog-meta-passed":"false" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI}
- exit $EXIT_CODE
- else
- retry 3 gsutil -h "x-goog-meta-passed":"true" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-function apply_start_user_script() {
- local CONTAINER_NAME=$1
- local TARGET_DIR=$2
-
- log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..."
- START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
- if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then
- gsutil cp ${START_USER_SCRIPT_URI} /etc
- else
- curl $START_USER_SCRIPT_URI -o /etc/${START_USER_SCRIPT}
- fi
- docker cp /etc/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT}
- retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT}
-
- # Keep in sync with startup.sh
- EXIT_CODE=0
- docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> start_output.txt || EXIT_CODE=$?
- if [ $EXIT_CODE -ne 0 ]; then
- echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
- retry 3 gsutil -h "x-goog-meta-passed":"false" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- exit $EXIT_CODE
- else
- retry 3 gsutil -h "x-goog-meta-passed":"true" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-#
-# Main
-#
-
-#
-# Array for instrumentation
-# UPDATE THIS IF YOU ADD MORE STEPS:
-# currently the steps are:
-# START init,
-# .. after gcloud Ops Agent
-# .. after env setup
-# .. after copying files from google and into docker
-# .. after docker compose
-# .. after welder start
-# .. after hail and spark
-# .. after nbextension install
-# .. after server extension install
-# .. after combined extension install
-# .. after user script
-# .. after lab extension install
-# .. after jupyter notebook start
-# END
-STEP_TIMINGS=($(date +%s))
-
-# temp workaround for https://github.com/docker/compose/issues/5930
-export CLOUDSDK_PYTHON=python3
-
-# This identifies whether we are running on the master node (running the jupyter container). There does not seem to be any customization of the worker nodes
-ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role)
-
-# Only initialize tool and proxy docker containers on the master
-if [[ "${ROLE}" == 'Master' ]]; then
-
- ## Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ###
- # See https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/opsagent
- # Installs the Google Cloud Ops Agent on each node in the cluster.
- # It also provides an override to the built-in logging config to set empty
- # receivers i.e. not collect any logs.
- # If you need to collect syslogs, you can use the other script in this directory,
- # opsagent.sh which uses the built-in configuration of Ops Agent.
- # See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default.
- #
- curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh
- bash add-google-cloud-ops-agent-repo.sh --also-install
-
- JUPYTER_HOME=/etc/jupyter
- JUPYTER_EXTENSIONS=$JUPYTER_HOME/extensions
- JUPYTER_SCRIPTS=$JUPYTER_EXTENSIONS/scripts
- KERNELSPEC_HOME=/usr/local/share/jupyter/kernels
-
- # Set variables
- # Values like $(..) are populated by Leo when a cluster is created.
- # See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
- # Avoid exporting variables unless they are needed by external scripts or docker-compose files.
- export CLOUD_SERVICE='DATAPROC'
- # Needs to be in sync with terra-docker container
- export USER_HOME=$(jupyterHomeDirectory)
- export CLUSTER_NAME=$(clusterName)
- export RUNTIME_NAME=$(clusterName)
- export GOOGLE_PROJECT=$(googleProject)
- export STAGING_BUCKET=$(stagingBucketName)
- export OWNER_EMAIL=$(loginHint)
- export PET_SA_EMAIL=$(petSaEmail)
- export JUPYTER_SERVER_NAME=$(jupyterServerName)
- export RSTUDIO_SERVER_NAME=$(rstudioServerName)
- export PROXY_SERVER_NAME=$(proxyServerName)
- export WELDER_SERVER_NAME=$(welderServerName)
- export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
- export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
- export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
- export PROXY_DOCKER_IMAGE=$(proxyDockerImage)
- export WELDER_DOCKER_IMAGE=$(welderDockerImage)
- export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
- export WELDER_ENABLED=$(welderEnabled)
- export NOTEBOOKS_DIR=$(notebooksDir)
- export MEM_LIMIT=$(memLimit)
- export SHM_SIZE=$(shmSize)
- export WELDER_MEM_LIMIT=$(welderMemLimit)
- export PROXY_SERVER_HOST_NAME=$(proxyServerHostName)
- export CERT_DIRECTORY='/certs'
- export WORK_DIRECTORY='/work'
- export DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
- PROXY_SITE_CONF=$(proxySiteConf)
- export HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}`
- if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- export SHOULD_BACKGROUND_SYNC="true"
- else
- export SHOULD_BACKGROUND_SYNC="false"
- fi
-
- SERVER_CRT=$(proxyServerCrt)
- SERVER_KEY=$(proxyServerKey)
- ROOT_CA=$(rootCaPem)
- JUPYTER_DOCKER_COMPOSE=$(jupyterDockerCompose)
- RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose)
- PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
- WELDER_DOCKER_COMPOSE=$(welderDockerCompose)
- PROXY_SITE_CONF=$(proxySiteConf)
- JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions)
- JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions)
- JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions)
- JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions)
- USER_SCRIPT_URI=$(userScriptUri)
- USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri)
- START_USER_SCRIPT_URI=$(startUserScriptUri)
- # Include a timestamp suffix to differentiate different startup logs across restarts.
- START_USER_SCRIPT_OUTPUT_URI="$(startUserScriptOutputUri)"
- JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
- CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri)
- RSTUDIO_SCRIPTS=/etc/rstudio/scripts
- RSTUDIO_USER_HOME=/home/rstudio
- INIT_BUCKET_NAME=$(initBucketName)
-
- STEP_TIMINGS+=($(date +%s))
-
- log 'Copying secrets from GCS...'
-
- mkdir /work
- mkdir /certs
- chmod a+rwx /work
-
- # Add the certificates from the bucket to the VM. They are used by the docker-compose file
- gsutil cp ${SERVER_CRT} /certs
- gsutil cp ${SERVER_KEY} /certs
- gsutil cp ${ROOT_CA} /certs
- gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY}
-
-
- # GCP connector is used by dataproc to connect with the staging bucket to read the logs
- touch /hadoop_gcs_connector_metadata_cache
- touch auth_openidc.conf
-
-
- # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts
- cat <> /etc/google-cloud-ops-agent/config.yaml
- logging:
- receivers:
- welder:
- type: files
- include_paths: [/work/welder.log]
- jupyter:
- type: files
- include_paths: [/work/jupyter.log]
- daemon:
- type: files
- include_paths: [/var/log/daemon.log]
- service:
- pipelines:
- default_pipeline:
- receivers: [welder, jupyter, daemon]
-EOF
- systemctl restart google-cloud-ops-agent
-
- # Install env var config
- if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then
- log 'Copy custom env vars config...'
- gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var
- fi
-
-
- # If any image is hosted in a GAR registry (detected by regex) then
- # authorize docker to interact with gcr.io.
- # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect.
- # TODO (LM) test with non-broad account
- if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
- log 'Authorizing GCR/GAR...'
- gcloud auth configure-docker
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- log 'Starting up the Jupyter docker...'
-
- # Run docker-compose for each specified compose file.
- # Note the `docker-compose pull` is retried to avoid intermittent network errors, but
- # `docker-compose up` is not retried.
- COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`)
-
- cat /etc/`basename ${PROXY_DOCKER_COMPOSE}`
-
- if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then
- COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`)
- cat /etc/`basename ${WELDER_DOCKER_COMPOSE}`
- fi
-
- if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
- TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
- COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`)
- cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`
- fi
-
- if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then
- TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
- COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`)
- cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`
- fi
-
- retry 5 docker-compose "${COMPOSE_FILES[@]}" config
-
- # restart docker
- systemctl restart docker
-
- retry 5 docker-compose "${COMPOSE_FILES[@]}" pull
- retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d
-
- # Start up crypto detector, if enabled.
- # This should be started after other containers.
- # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
- # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
- if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
- docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
- --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # Jupyter-specific setup, only do if Jupyter is installed
- if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
- log 'Installing Jupydocker kernelspecs...'
-
- # Install notebook.json
- if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then
- log 'Copy Jupyter frontend notebook config...'
- gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc
- JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
- docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # Install NbExtensions. These are user-specified Jupyter extensions.
- # For instance Terra UI is passing
- # {
- # "nbExtensions": {
- # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js"
- # },
- # "labExtensions": {},
- # "serverExtensions": {},
- # "combinedExtensions": {}
- # }
- if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then
- for ext in ${JUPYTER_NB_EXTENSIONS}
- do
- log 'Installing Jupyter NB extension [$ext]...'
- if [[ $ext == 'gs://'* ]]; then
- gsutil cp $ext /etc
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
- JUPYTER_EXTENSION_FILE=`basename $ext`
- curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
- docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_notebook_extension.sh $ext
- fi
- done
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # Install serverExtensions if provided by the user
- if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then
- for ext in ${JUPYTER_SERVER_EXTENSIONS}
- do
- log 'Installing Jupyter server extension [$ext]...'
- if [[ $ext == 'gs://'* ]]; then
- gsutil cp $ext /etc
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME}${JUPYTER_SCRIPTS}/jupyter_pip_install_server_extension.sh $ext
- fi
- done
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # Install combined extensions if provided by the user
- if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then
- for ext in ${JUPYTER_COMBINED_EXTENSIONS}
- do
- log 'Installing Jupyter combined extension [$ext]...'
- log $ext
- if [[ $ext == 'gs://'* ]]; then
- gsutil cp $ext /etc
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE}
- else
- retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_pip_install_combined_extension.sh $ext
- fi
- done
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # If a user script was specified, copy it into the docker container and execute it.
- if [ ! -z "$USER_SCRIPT_URI" ] ; then
- apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
- fi
-
- # done user script
- STEP_TIMINGS+=($(date +%s))
-
- # If a start user script was specified, copy it into the docker container for consumption during startups.
- if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
- apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME
- fi
-
- # done start user script
- STEP_TIMINGS+=($(date +%s))
-
- # Install lab extensions if provided by the user
- # Note: lab extensions need to installed as jupyter user, not root
- if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then
- for ext in ${JUPYTER_LAB_EXTENSIONS}
- do
- log 'Installing JupyterLab extension [$ext]...'
- pwd
- if [[ $ext == 'gs://'* ]]; then
- gsutil cp -r $ext /etc
- JUPYTER_EXTENSION_ARCHIVE=`basename $ext`
- docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE}
- elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then
- JUPYTER_EXTENSION_FILE=`basename $ext`
- curl $ext -o /etc/${JUPYTER_EXTENSION_FILE}
- docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE}
- else
- retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/jupyter_install_lab_extension.sh $ext
- fi
- done
- fi
-
- STEP_TIMINGS+=($(date +%s))
-
- # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
- # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
- # kernel tries to connect to it.
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
-
- # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
- # A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' ${JUPYTER_EXTENSIONS}/jupyter_delocalize.py"
-
- # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
- # This is to make it so that older images will still work after we change notebooks location to home dir
- docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
-
- # Copy gitignore into jupyter container
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global"
-
- # Install nbstripout and set gitignore in Git Config
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \
- && python -m nbstripout --install --global \
- && git config --global core.excludesfile $USER_HOME/gitignore_global"
-
- # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
- docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_SCRIPTS/install_jupyter_contrib_nbextensions.sh \
- && mkdir -p $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/google_sign_in.js $USER_HOME/.jupyter/custom/ \
- && ls -la $JUPYTER_EXTENSIONS/extension_entry_jupyter.js \
- && cp $JUPYTER_EXTENSIONS/extension_entry_jupyter.js $USER_HOME/.jupyter/custom/custom.js \
- && cp $JUPYTER_EXTENSIONS/safe-mode.js $USER_HOME/.jupyter/custom/ \
- && cp $JUPYTER_EXTENSIONS/edit-mode.js $USER_HOME/.jupyter/custom/ \
- && mkdir -p $JUPYTER_HOME/nbconfig"
-
- log 'Starting Jupyter Notebook...'
- retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_HOME}/run-jupyter.sh ${NOTEBOOKS_DIR}"
-
- STEP_TIMINGS+=($(date +%s))
- fi
-
- # RStudio specific setup; only do if RStudio is installed
- if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- EXIT_CODE=0
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$?
- if [ $EXIT_CODE -ne 0 ]; then
- echo "RStudio user package installation directory creation failed, creating /packages directory"
- docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages"
- fi
-
- # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT
-CLUSTER_NAME=$CLUSTER_NAME
-RUNTIME_NAME=$RUNTIME_NAME
-OWNER_EMAIL=$OWNER_EMAIL
-SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site'
-
- # Add custom_env_vars.env to Renviron.site
- CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env
- if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then
- retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env
- retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site'
- fi
-
- # If a user script was specified, copy it into the docker container and execute it.
- if [ ! -z "$USER_SCRIPT_URI" ] ; then
- apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
- fi
-
- # If a start user script was specified, copy it into the docker container for consumption during startups.
- if [ ! -z "$START_USER_SCRIPT_URI" ] ; then
- apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS
- fi
-
- # Start RStudio server
- retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init
- fi
-
- # Remove any unneeded cached images to save disk space.
- # Do this asynchronously so it doesn't hold up cluster creation
- log 'Pruning docker images...'
- docker image prune -a -f &
-fi
-
-log 'All done!'
-log "Timings: ${STEP_TIMINGS[@]}"
diff --git a/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
deleted file mode 100644
index d94f5eb763..0000000000
--- a/http/src/main/resources/base-init-resources/jupyter-docker-compose-gce.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- jupyter:
- container_name: "${JUPYTER_SERVER_NAME}"
- image: "${JUPYTER_DOCKER_IMAGE}"
- # Override entrypoint with a placeholder to keep the container running indefinitely.
- # The cluster init script will run some scripts as root and then start pyspark as
- # jupyter-user via docker exec.
- # -F will follow the log when the log is created.
- entrypoint: "tail -F ${NOTEBOOKS_DIR}/jupyter.log"
- ports:
- - "8000:8000"
- networks:
- - app_network
- volumes:
- # shared with welder
- - "/mnt/disks/work:${NOTEBOOKS_DIR}"
- restart: always
- environment:
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- PET_SA_EMAIL: "${PET_SA_EMAIL}"
- # Value must be the string "true" to enable.
- WELDER_ENABLED: "${WELDER_ENABLED}"
- NOTEBOOKS_DIR: "${NOTEBOOKS_DIR}"
- PIP_USER: "true"
- R_LIBS: "${NOTEBOOKS_DIR}/packages"
- # The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
- # We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
- PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
- env_file:
- - /var/custom_env_vars.env
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
- memswap_limit: ${MEM_LIMIT}
- shm_size: ${SHM_SIZE}
-networks:
- app_network:
- external: true
diff --git a/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
deleted file mode 100644
index 1a8d7f8e88..0000000000
--- a/http/src/main/resources/base-init-resources/jupyter-docker-compose.yaml
+++ /dev/null
@@ -1,61 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- jupyter:
- container_name: "${JUPYTER_SERVER_NAME}"
- image: "${JUPYTER_DOCKER_IMAGE}"
- # Override entrypoint with a placeholder to keep the container running indefinitely.
- # The runtime init script will run some scripts as root and then start pyspark as
- # jupyter-user via docker exec.
- entrypoint: "tail -f /dev/null"
- network_mode: host
- volumes:
- # shared with welder
- - ${WORK_DIRECTORY}:${NOTEBOOKS_DIR}
- - /usr/lib/bigtop-utils:/usr/lib/bigtop-utils
- - /usr/lib/hadoop:/usr/lib/hadoop
- - /usr/lib/hadoop-hdfs:/usr/lib/hadoop-hdfs
- - /usr/lib/hadoop-mapreduce:/usr/lib/hadoop-mapreduce
- - /usr/lib/hadoop-yarn:/usr/lib/hadoop-yarn
- - /usr/lib/hive:/usr/lib/hive
- - /usr/lib/pig:/usr/lib/pig
- - /etc/hadoop:/etc/hadoop
- - /usr/lib/spark:/usr/lib/spark
- - /etc/spark:/etc/spark
- - /etc/hive:/etc/hive
- - /usr/bin/pyspark:/usr/bin/pyspark
- - /usr/bin/hdfs:/usr/bin/hdfs
- - /usr/bin/hadoop:/usr/bin/hadoop
- - /usr/bin/spark-submit:/usr/bin/spark-submit
- - /usr/bin/yarn:/usr/bin/yarn
- - /usr/bin/pig:/usr/bin/pig
- - /hadoop:/hadoop
- - /hadoop_gcs_connector_metadata_cache:/hadoop_gcs_connector_metadata_cache
- - /usr/local/share/google/dataproc:/usr/local/share/google/dataproc
- restart: always
- environment:
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- PET_SA_EMAIL: "${PET_SA_EMAIL}"
- # Value must be the string "true" to enable.
- WELDER_ENABLED: "${WELDER_ENABLED}"
- NOTEBOOKS_DIR: "${NOTEBOOKS_DIR}"
- MEM_LIMIT: "${MEM_LIMIT}"
- # (1/6/2022) When it's a year from now, consider removing the next two lines.
- # The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2.
- # We should remove the two lines once we no longer support older images.
- # When we update base image in terra-docker next time, we should verify the paths are still valid
- PYTHONPATH: "/etc/jupyter/extensions:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
- env_file:
- - /var/custom_env_vars.env
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
- memswap_limit: ${MEM_LIMIT}
- shm_size: ${SHM_SIZE}
diff --git a/http/src/main/resources/base-init-resources/notebook.json b/http/src/main/resources/base-init-resources/notebook.json
deleted file mode 100644
index d6e21fc722..0000000000
--- a/http/src/main/resources/base-init-resources/notebook.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
- "googleProject": $(googleProject),
- "clusterName": $(clusterName),
- "loginHint": $(loginHint),
- "googleClientId": $(googleClientId),
- "welderEnabled": $(welderEnabled)
-}
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
deleted file mode 100644
index 1ab92fe839..0000000000
--- a/http/src/main/resources/base-init-resources/proxy-docker-compose-gce.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-version: '2.4'
-services:
- proxy:
- container_name: "${PROXY_SERVER_NAME}"
- image: "${PROXY_DOCKER_IMAGE}"
- ports:
- - "443:443"
- networks:
- - app_network
- volumes:
- - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro
- - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro
- - ${CERT_DIRECTORY}/rootCA.pem:/etc/ssl/certs/ca-bundle.crt:ro
- - ${HOST_PROXY_SITE_CONF_FILE_PATH}:/etc/apache2/sites-enabled/site.conf
- restart: always
- environment:
- HTTPD_PORT: '80'
- SSL_HTTPD_PORT: '443'
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- PROXY_SERVER_HOST_NAME: "${PROXY_SERVER_HOST_NAME}"
\ No newline at end of file
diff --git a/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml b/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
deleted file mode 100644
index 932c6bfbfc..0000000000
--- a/http/src/main/resources/base-init-resources/proxy-docker-compose.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-version: '2.4'
-services:
- proxy:
- container_name: "${PROXY_SERVER_NAME}"
- image: "mirror.gcr.io/${PROXY_DOCKER_IMAGE}"
- network_mode: host
- ipc: shareable
- volumes:
- - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro
- - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro
- - ${CERT_DIRECTORY}/rootCA.pem:/etc/ssl/certs/ca-bundle.crt:ro
- - ${HOST_PROXY_SITE_CONF_FILE_PATH}:/etc/apache2/sites-enabled/site.conf
- restart: always
- environment:
- HTTPD_PORT: '80'
- SSL_HTTPD_PORT: '443'
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- PROXY_SERVER_HOST_NAME: "${PROXY_SERVER_HOST_NAME}"
diff --git a/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
deleted file mode 100644
index b1958cace3..0000000000
--- a/http/src/main/resources/base-init-resources/rstudio-docker-compose-gce.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- rstudio:
- container_name: "${RSTUDIO_SERVER_NAME}"
- image: "${RSTUDIO_DOCKER_IMAGE}"
- # Override the entrypoint from the Dockerfile so rserver starts with the below environment variables
- entrypoint: "tail -f /dev/null"
- restart: always
- ports:
- - "8001:8001"
- networks:
- - app_network
- environment:
- # needed to disable auth
- USER: "rstudio"
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- RSTUDIO_USER_HOME: "${RSTUDIO_USER_HOME}"
- volumes:
- - ${WORK_DIRECTORY}:/home/rstudio
- env_file:
- - /var/custom_env_vars.env
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${MEM_LIMIT} # hard limit in byte on memory consumption by the container
- memswap_limit: ${MEM_LIMIT}
- shm_size: ${SHM_SIZE}
-networks:
- app_network:
- external: true
diff --git a/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml b/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
deleted file mode 100644
index 9733f1247c..0000000000
--- a/http/src/main/resources/base-init-resources/rstudio-docker-compose.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- rstudio:
- container_name: "${RSTUDIO_SERVER_NAME}"
- image: "${RSTUDIO_DOCKER_IMAGE}"
- # Override the entrypoint from the Dockerfile so rserver starts with the below environment variables
- entrypoint: "tail -f /dev/null"
- restart: always
- network_mode: host
- environment:
- # needed to disable auth
- USER: "rstudio"
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- volumes:
- - ${WORK_DIRECTORY}:/home/rstudio
- env_file:
- - /var/custom_env_vars.env
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${MEM_LIMIT} # hard limit on memory consumption by the container
- memswap_limit: ${MEM_LIMIT}
- shm_size: ${SHM_SIZE}
diff --git a/http/src/main/resources/base-init-resources/shutdown.sh b/http/src/main/resources/base-init-resources/shutdown.sh
deleted file mode 100644
index 6be30339a6..0000000000
--- a/http/src/main/resources/base-init-resources/shutdown.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env bash
-
-set -e -x
-
-##
-# This is a shutdown script designed to run on Leo-created Google Dataproc clusters and Google Compute Engines (GCE).
-##
-
-# The CLOUD_SERVICE is assumed based on the location of the certs directory
-if [ -f "/var/certs/jupyter-server.crt" ]
-then
- export CLOUD_SERVICE='GCE'
-else
- export CLOUD_SERVICE='DATAPROC'
-fi
-
-# Set variables
-# Values like $(..) are populated by Leo when a cluster is resumed.
-# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
-# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
-export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
-export RSTUDIO_SERVER_NAME=$(rstudioServerName)
-export SHOULD_DELETE_JUPYTER_DIR=$(shouldDeleteJupyterDir)
-
-
-# Remove jupyter related files if user decides to delete the VM
-if [ -d '/mnt/disks/work/.jupyter' ] && [ "SHOULD_DELETE_JUPYTER_DIR" = "true" ] ; then
- rm -rf /mnt/disks/work/.jupyter
- rm -rf /mnt/disks/work/.local || true
-fi
-
-if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
- # COS images need to run docker-compose as a container by design
- DOCKER_COMPOSE='docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2'
-else
- # Dataproc has docker-compose natively installed
- DOCKER_COMPOSE='docker-compose'
-fi
-
-$DOCKER_COMPOSE down
diff --git a/http/src/main/resources/base-init-resources/startup.sh b/http/src/main/resources/base-init-resources/startup.sh
deleted file mode 100644
index 8ba25adcad..0000000000
--- a/http/src/main/resources/base-init-resources/startup.sh
+++ /dev/null
@@ -1,399 +0,0 @@
-#!/usr/bin/env bash
-
-set -e -x
-
-##
-# This is a startup script designed to run on Leo-created Dataproc clusters and GCE VMs.
-#
-# It starts up Jupyter and Welder processes. It also optionally deploys welder on a
-# cluster if not already installed.
-##
-
-EXIT_CODE=0
-
-# Set variables
-# Values like $(..) are populated by Leo when a cluster is resumed.
-# See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192
-# Avoid exporting variables unless they are needed by external scripts or docker-compose files.
-# The CLOUD_SERVICE is assumed based on the location of the certs directory
-if [ -f "/var/certs/jupyter-server.crt" ]
-then
- export CLOUD_SERVICE='GCE'
- export WORK_DIRECTORY='/mnt/disks/work'
- CERT_DIRECTORY='/var/certs'
- GSUTIL_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gsutil'
- GCLOUD_CMD='docker run --rm -v /var:/var us.gcr.io/cos-cloud/toolbox:v20230714 gcloud'
- DOCKER_COMPOSE='docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /var:/var docker/compose:1.29.2'
- DOCKER_COMPOSE_FILES_DIRECTORY='/var/docker-compose-files'
-
-else
- export CLOUD_SERVICE='DATAPROC'
- export WORK_DIRECTORY='/work'
- CERT_DIRECTORY='/certs'
- GSUTIL_CMD='gsutil'
- GCLOUD_CMD='gcloud'
- DOCKER_COMPOSE='docker-compose'
- DOCKER_COMPOSE_FILES_DIRECTORY='/etc'
-fi
-export USER_HOME=$(jupyterHomeDirectory)
-export RSTUDIO_USER_HOME=/home/rstudio
-export GOOGLE_PROJECT=$(googleProject)
-export CLUSTER_NAME=$(clusterName)
-export RUNTIME_NAME=$(clusterName)
-export OWNER_EMAIL=$(loginHint)
-export PET_SA_EMAIL=$(petSaEmail)
-export JUPYTER_SERVER_NAME=$(jupyterServerName)
-export RSTUDIO_SERVER_NAME=$(rstudioServerName)
-export WELDER_SERVER_NAME=$(welderServerName)
-export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName)
-export NOTEBOOKS_DIR=$(notebooksDir)
-export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage)
-export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage)
-JUPYTER_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/jupyter-docker*)
-COMPLETE_JUPYTER_DOCKER_COMPOSE="-f $JUPYTER_DOCKER_COMPOSE"
-RSTUDIO_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/rstudio-docker*)
-COMPLETE_RSTUDIO_DOCKER_COMPOSE="-f $RSTUDIO_DOCKER_COMPOSE"
-export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage)
-export WELDER_ENABLED=$(welderEnabled)
-export UPDATE_WELDER=$(updateWelder)
-export WELDER_DOCKER_IMAGE=$(welderDockerImage)
-export DISABLE_DELOCALIZATION=$(disableDelocalization)
-export STAGING_BUCKET=$(stagingBucketName)
-export START_USER_SCRIPT_URI=$(startUserScriptUri)
-export START_USER_SCRIPT_OUTPUT_URI=$(startUserScriptOutputUri)
-export WELDER_MEM_LIMIT=$(welderMemLimit)
-export MEM_LIMIT=$(memLimit)
-export SHM_SIZE=$(shmSize)
-export INIT_BUCKET_NAME=$(initBucketName)
-export USE_GCE_STARTUP_SCRIPT=$(useGceStartupScript)
-export PROXY_DOCKER_COMPOSE=$(proxyDockerCompose)
-JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri)
-GPU_ENABLED=$(gpuEnabled)
-if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- export SHOULD_BACKGROUND_SYNC="true"
-else
- export SHOULD_BACKGROUND_SYNC="false"
-fi
-
-# Overwrite old cert on restart
-SERVER_CRT=$(proxyServerCrt)
-SERVER_KEY=$(proxyServerKey)
-ROOT_CA=$(rootCaPem)
-
-#
-# Functions
-# (copied from init-actions.sh and gce-init.sh, see documentation there)
-#
-function retry {
- local retries=$1
- shift
-
- for ((i = 1; i <= $retries; i++)); do
- # run with an 'or' so set -e doesn't abort the bash script on errors
- exit=0
- "$@" || exit=$?
- if [ $exit -eq 0 ]; then
- return 0
- fi
- wait=$((2 ** $i))
- if [ $i -eq $retries ]; then
- log "Retry $i/$retries exited $exit, no more retries left."
- break
- fi
- log "Retry $i/$retries exited $exit, retrying in $wait seconds..."
- sleep $wait
- done
- return 1
-}
-
-function log() {
- echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
-}
-
-function failScriptIfError() {
- if [ $EXIT_CODE -ne 0 ]; then
- echo "Fail to docker-compose start container ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}"
- retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"false" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- exit $EXIT_CODE
- else
- retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-function validateCert() {
- certFileDirectory=$1
- ## This helps when we need to rotate certs.
- notAfter=`openssl x509 -enddate -noout -in ${certFileDirectory}/jupyter-server.crt` # output should be something like `notAfter=Jul 4 20:31:52 2026 GMT`
-
- ## If cert is old, then pull latest certs. Update date if we need to rotate cert again
- if [[ "$notAfter" != *"notAfter=Jul 4"* ]] ; then
- ${GSUTIL_CMD} cp ${SERVER_CRT} ${certFileDirectory}
- ${GSUTIL_CMD} cp ${SERVER_KEY} ${certFileDirectory}
- ${GSUTIL_CMD} cp ${ROOT_CA} ${certFileDirectory}
-
- IMAGES_TO_RESTART=(-f /var/docker-compose-files/proxy-docker-compose-gce.yaml)
- DATAPROC_IMAGES_TO_RESTART=(-f /etc/proxy-docker-compose.yaml)
- if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ]; then
- IMAGES_TO_RESTART+=(-f /var/docker-compose-files/welder-docker-compose-gce.yaml)
- DATAPROC_IMAGES_TO_RESTART+=(-f /etc/welder-docker-compose.yaml)
- fi
- if [[ ! -z "$RSTUDIO_DOCKER_IMAGE" ]] ; then
- IMAGES_TO_RESTART+=(-f /var/docker-compose-files/rstudio-docker-compose-gce.yaml)
- fi
- if [[ ! -z "$JUPYTER_DOCKER_IMAGE" ]] ; then
- IMAGES_TO_RESTART+=(-f /var/docker-compose-files/jupyter-docker-compose-gce.yaml)
- DATAPROC_IMAGES_TO_RESTART+=(-f /etc/jupyter-docker-compose.yaml )
- fi
-
- if [ "${CLOUD_SERVICE}" == 'DATAPROC' ]
- then
- ${DOCKER_COMPOSE} "${DATAPROC_IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
- else
- ${DOCKER_COMPOSE} --env-file=/var/variables.env "${IMAGES_TO_RESTART[@]}" restart &> /var/start_output.txt || EXIT_CODE=$?
- fi
-
- failScriptIfError ${GSUTIL_CMD}
- retry 3 ${GSUTIL_CMD} -h "x-goog-meta-passed":"true" cp /var/start_output.txt ${START_USER_SCRIPT_OUTPUT_URI}
- fi
-}
-
-#
-# Main
-## The PD should be the only `sd` disk that is not mounted yet
-AllsdDisks=($(lsblk --nodeps --noheadings --output NAME --paths | grep -i "sd"))
-FreesdDisks=()
-for Disk in "${AllsdDisks[@]}"; do
- Mounts="$(lsblk -no MOUNTPOINT "${Disk}")"
- if [ -z "$Mounts" ]; then
- echo "Found our unmounted persistent disk!"
- FreesdDisks="${Disk}"
- else
- echo "Not our persistent disk!"
- fi
-done
-DISK_DEVICE_ID=${FreesdDisks}
-
-## Notebook server home directories
-JUPYTER_HOME=/etc/jupyter
-RSTUDIO_SCRIPTS=/etc/rstudio/scripts
-
-if [ "${GPU_ENABLED}" == "true" ] ; then
- log 'Installing GPU driver...'
- version="535.154.05"
- isAvailable=$(cos-extensions list|grep $version)
- if [[ -z "$isAvailable" ]]; then
- # Install default version on the COS image
- cos-extensions install gpu
- else
- cos-extensions install gpu -- --version $version
- fi
-
- mount --bind /var/lib/nvidia /var/lib/nvidia
- mount -o remount,exec /var/lib/nvidia
-
- GPU_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/gpu-docker*)
- COMPLETE_JUPYTER_DOCKER_COMPOSE="-f $JUPYTER_DOCKER_COMPOSE -f $GPU_DOCKER_COMPOSE"
- COMPLETE_RSTUDIO_DOCKER_COMPOSE="-f $RSTUDIO_DOCKER_COMPOSE -f $GPU_DOCKER_COMPOSE"
-fi
-
-
-if [ "$UPDATE_WELDER" == "true" ] ; then
- echo "Upgrading welder..."
- WELDER_DOCKER_COMPOSE=$(ls ${DOCKER_COMPOSE_FILES_DIRECTORY}/welder*)
- # Make sure when runtimes restarts, they'll get a new version of welder docker compose file
- $GSUTIL_CMD cp gs://${INIT_BUCKET_NAME}/`basename ${WELDER_DOCKER_COMPOSE}` $WELDER_DOCKER_COMPOSE
-
-tee /var/welder-variables.env << END
-WORK_DIRECTORY=${WORK_DIRECTORY}
-GOOGLE_PROJECT=${GOOGLE_PROJECT}
-RUNTIME_NAME=${RUNTIME_NAME}
-OWNER_EMAIL=${OWNER_EMAIL}
-PET_SA_EMAIL=${PET_SA_EMAIL}
-WELDER_ENABLED=${WELDER_ENABLED}
-WELDER_SERVER_NAME=${WELDER_SERVER_NAME}
-WELDER_DOCKER_IMAGE=${WELDER_DOCKER_IMAGE}
-STAGING_BUCKET=${STAGING_BUCKET}
-WELDER_MEM_LIMIT=${WELDER_MEM_LIMIT}
-SHOULD_BACKGROUND_SYNC=${SHOULD_BACKGROUND_SYNC}
-END
-
- ${DOCKER_COMPOSE} -f ${WELDER_DOCKER_COMPOSE} stop
- ${DOCKER_COMPOSE} -f ${WELDER_DOCKER_COMPOSE} rm -f
- ${DOCKER_COMPOSE} --env-file=/var/welder-variables.env -f ${WELDER_DOCKER_COMPOSE} up -d &> /var/start_output.txt || EXIT_CODE=$?
-fi
-
-if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
- # GCE
- fsck.ext4 -tvy ${DISK_DEVICE_ID}
- mkdir -p /mnt/disks/work
- mount -t ext4 -O discard,defaults ${DISK_DEVICE_ID} ${WORK_DIRECTORY}
- chmod a+rwx /mnt/disks/work
-
- # (1/6/22) Restart Jupyter Container to reset `NOTEBOOKS_DIR` for existing runtimes. This code can probably be removed after a year
- if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
- # The user might have updated the runtime, which would change some environment variables like MEM_LIMIT and SHM_SIZE
-
-tee /var/variables.env << END
-JUPYTER_SERVER_NAME=${JUPYTER_SERVER_NAME}
-JUPYTER_DOCKER_IMAGE=${JUPYTER_DOCKER_IMAGE}
-NOTEBOOKS_DIR=${NOTEBOOKS_DIR}
-GOOGLE_PROJECT=${GOOGLE_PROJECT}
-RUNTIME_NAME=${RUNTIME_NAME}
-OWNER_EMAIL=${OWNER_EMAIL}
-PET_SA_EMAIL=${PET_SA_EMAIL}
-WELDER_ENABLED=${WELDER_ENABLED}
-SHM_SIZE=${SHM_SIZE}
-END
-
- # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
- # We only want to restart the existing container with the latest environment variables
- ${DOCKER_COMPOSE} --env-file=/var/variables.env ${COMPLETE_JUPYTER_DOCKER_COMPOSE} up -d --no-recreate
-
- # the docker containers need to be restarted or the jupyter container
- # will fail to start until the appropriate volume/device exists
- docker restart $JUPYTER_SERVER_NAME
- docker restart $WELDER_SERVER_NAME
-
- # update memory size, the memory swap must be updated as well (cannot be < memory)
- docker update --memory ${MEM_LIMIT} --memory-swap ${MEM_LIMIT} $JUPYTER_SERVER_NAME
-
- log 'Copy Jupyter frontend notebook config...'
- $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
- JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
- docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
- fi
-
- if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- echo "Restarting Rstudio Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
- # The user might have updated the runtime, which would change some environment variables like MEM_LIMIT and SHM_SIZE
-
-tee /var/variables.env << END
-WORK_DIRECTORY=${WORK_DIRECTORY}
-RSTUDIO_SERVER_NAME=${RSTUDIO_SERVER_NAME}
-RSTUDIO_DOCKER_IMAGE=${RSTUDIO_DOCKER_IMAGE}
-RSTUDIO_USER_HOME=${RSTUDIO_USER_HOME}
-GOOGLE_PROJECT=${GOOGLE_PROJECT}
-RUNTIME_NAME=${RUNTIME_NAME}
-OWNER_EMAIL=${OWNER_EMAIL}
-PET_SA_EMAIL=${PET_SA_EMAIL}
-WELDER_ENABLED=${WELDER_ENABLED}
-SHM_SIZE=${SHM_SIZE}
-END
-
- # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
- # We only want to restart the existing container with the latest environment variables
- ${DOCKER_COMPOSE} --env-file=/var/variables.env ${COMPLETE_RSTUDIO_DOCKER_COMPOSE} up -d --no-recreate
-
- # update memory size, the memory swap must be updated as well (cannot be < memory)
- docker update --memory ${MEM_LIMIT} --memory-swap ${MEM_LIMIT} $RSTUDIO_SERVER_NAME
-
- # the docker containers need to be restarted or the R container
- # will fail to start until the appropriate volume/device exists.
- docker restart $RSTUDIO_SERVER_NAME
- docker restart $WELDER_SERVER_NAME
-
- fi
-else
- # DATAPROC
-
- if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..."
-
- # We do not want to recreate a new container, to make sure we preserve the changes that users made with the startup script
- ${DOCKER_COMPOSE} ${COMPLETE_JUPYTER_DOCKER_COMPOSE} up -d --no-recreate
-
- log 'Copy Jupyter frontend notebook config...'
- $GSUTIL_CMD cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /var
- JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}`
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig"
- docker cp /var/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/
-
- # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network
- # A better to do this might be to take welder host as an argument to the script
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/extensions/jupyter_delocalize.py"
- fi
-fi
-
-
-validateCert ${CERT_DIRECTORY}
-
-# If a start user script was specified, execute it now. It should already be in the docker container
-# via initialization in init-actions.sh (we explicitly do not want to recopy it from GCS on every cluster resume).
-if [ ! -z ${START_USER_SCRIPT_URI} ] ; then
- START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}`
- log "Executing user start script [$START_USER_SCRIPT]..."
-
- if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- if [ "$USE_GCE_STARTUP_SCRIPT" == "true" ] ; then
- docker cp /var/${START_USER_SCRIPT} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${START_USER_SCRIPT}
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} chmod +x ${JUPYTER_HOME}/${START_USER_SCRIPT}
-
- docker exec --privileged -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
- else
- docker cp /etc/${START_USER_SCRIPT} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${START_USER_SCRIPT}
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} chmod +x ${JUPYTER_HOME}/${START_USER_SCRIPT}
-
- docker exec --privileged -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_HOME}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
- fi
- fi
-
- if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- docker cp /var/${START_USER_SCRIPT} ${RSTUDIO_SERVER_NAME}:${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT}
- retry 3 docker exec -u root ${RSTUDIO_SERVER_NAME} chmod +x ${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT}
-
- docker exec --privileged -u root ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/${START_USER_SCRIPT} &> /var/start_output.txt || EXIT_CODE=$?
- fi
-
- failScriptIfError
-fi
-
-# By default GCE restarts containers on exit so we're not explicitly starting them below
-
-# Configuring Jupyter
-if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
- echo "Starting Jupyter on cluster $GOOGLE_PROJECT / $CLUSTER_NAME..."
- TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME}
-
- # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume
- # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter
- # kernel tries to connect to it.
- docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
-
- # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
- # This is to make it so that older images will still work after we change notebooks location to home dir
- docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
-
- # Start Jupyter server
- docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/run-jupyter.sh $NOTEBOOKS_DIR || /etc/jupyter/bin/jupyter notebook)"
-fi
-
-# Configuring RStudio, if enabled
-if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then
- echo "Starting RStudio on cluster $GOOGLE_PROJECT / $CLUSTER_NAME..."
-
- TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME}
-
- # Warm up R before starting the RStudio session (see above comment).
- docker exec $RSTUDIO_SERVER_NAME /bin/bash -c "R -e '1+1'" || true
-
- # Start RStudio server
- docker exec -d $RSTUDIO_SERVER_NAME /init
-fi
-
-# Start up crypto detector, if enabled.
-# This should be started after other containers.
-# Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network.
-# See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2
-if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then
- docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \
- --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE}
-fi
-
-# Resize persistent disk if needed.
-# If it's GCE, we resize the PD. Dataproc doesn't have PD
-if [[ "${CLOUD_SERVICE}" == 'GCE' ]]; then
- echo "Resizing persistent disk attached to runtime $GOOGLE_PROJECT / $CLUSTER_NAME if disk size changed..."
- resize2fs ${DISK_DEVICE_ID}
-fi
diff --git a/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml b/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
deleted file mode 100644
index f57f564a2b..0000000000
--- a/http/src/main/resources/base-init-resources/welder-docker-compose-gce.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- welder:
- container_name: "${WELDER_SERVER_NAME}"
- image: "${WELDER_DOCKER_IMAGE}"
- ports:
- - "8080:8080"
- networks:
- - app_network
- entrypoint: "/opt/docker/bin/entrypoint.sh"
- restart: always
- environment:
- # TODO what env vars are actually needed here?
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- STAGING_BUCKET: "${STAGING_BUCKET}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- SHOULD_BACKGROUND_SYNC: "${SHOULD_BACKGROUND_SYNC}"
- CLOUD_PROVIDER: gcp
- LOCKING_ENABLED: "true"
- PORT: 8080
- # The following envs aren't needed for GCP. But use dummy values just so welder will be able to read config properly
- # TODO: remove as part of https://broadworkbench.atlassian.net/browse/AN-573
- WSM_URL: "dummy"
- WORKSPACE_ID: "dummy"
- STORAGE_CONTAINER_RESOURCE_ID: "dummy"
- STAGING_STORAGE_CONTAINER_RESOURCE_ID: "dummy"
- AZURE_MANAGEMENT_URL: "dummy"
- volumes:
- # shared with jupyter
- - ${WORK_DIRECTORY}:/work
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${WELDER_MEM_LIMIT}
- mem_reservation: ${WELDER_MEM_LIMIT}
- # disable swap by setting it to the same value as mem_limit
- memswap_limit: ${WELDER_MEM_LIMIT}
-networks:
- app_network:
- external: true
diff --git a/http/src/main/resources/base-init-resources/welder-docker-compose.yaml b/http/src/main/resources/base-init-resources/welder-docker-compose.yaml
deleted file mode 100644
index 509f72131e..0000000000
--- a/http/src/main/resources/base-init-resources/welder-docker-compose.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Note: we need to stay on docker-compose version 2 because version 3 doesn't support
-# configuring memory options in container mode. See discussion in:
-# https://docs.docker.com/compose/compose-file/#resources
-# https://github.com/docker/compose/issues/4513
-version: '2.4'
-services:
- welder:
- container_name: "${WELDER_SERVER_NAME}"
- image: "${WELDER_DOCKER_IMAGE}"
- entrypoint: "/opt/docker/bin/entrypoint.sh"
- network_mode: host
- restart: always
- environment:
- # TODO what env vars are actually needed here?
- GOOGLE_PROJECT: "${GOOGLE_PROJECT}"
- STAGING_BUCKET: "${STAGING_BUCKET}"
- CLUSTER_NAME: "${RUNTIME_NAME}"
- RUNTIME_NAME: "${RUNTIME_NAME}"
- OWNER_EMAIL: "${OWNER_EMAIL}"
- SHOULD_BACKGROUND_SYNC: "${SHOULD_BACKGROUND_SYNC}"
- CLOUD_PROVIDER: gcp
- LOCKING_ENABLED: "true"
- PORT: 8080
- # The following envs aren't needed for GCP. But use dummy values just so welder will be able to read config properly
- # TODO: remove as part of https://broadworkbench.atlassian.net/browse/AN-573
- WSM_URL: "dummy"
- WORKSPACE_ID: "dummy"
- STORAGE_CONTAINER_RESOURCE_ID: "dummy"
- STAGING_STORAGE_CONTAINER_RESOURCE_ID: "dummy"
- AZURE_MANAGEMENT_URL: "dummy"
- volumes:
- # shared with jupyter
- - ${WORK_DIRECTORY}:/work
- # See https://docs.docker.com/engine/reference/run/#user-memory-constraints
- mem_limit: ${WELDER_MEM_LIMIT}
- mem_reservation: ${WELDER_MEM_LIMIT}
- # disable swap by setting it to the same value as mem_limit
- memswap_limit: ${WELDER_MEM_LIMIT}
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
index 9d64028883..e3cffcb18c 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala
@@ -18,6 +18,5 @@ case class ClusterResourcesConfig(
)
object ClusterResourcesConfig {
- val path = "init-resources"
- val basePath = "base-init-resources"
+ val basePath = "init-resources"
}
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
index 2a2fcbeda3..23142bd5cb 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
@@ -112,14 +112,9 @@ class GceInterpreter[F[_]](
.fromOption(config.clusterResourcesConfig.cloudInit,
new LeoException("No cloud init file defined for GCE VM.", traceId = Some(ctx.traceId))
)
- // if the user is using the new terra-base jupyter image, a different set of init scripts is needed
- // the terra-base init scripts have some different paths etc and are within the folder 'base-init-resources'
- initResourcesPath =
- if (params.runtimeImages.exists(img => img.imageType == Jupyter && img.imageUrl.contains("terra-base")))
- ClusterResourcesConfig.basePath
else ClusterResourcesConfig.path
cloudInitFileContent = scala.io.Source
- .fromResource(s"${initResourcesPath}/${cloudInit.asString}")
+ .fromResource(s"${ClusterResourcesConfig.basePath}/${cloudInit.asString}")
.getLines()
.toList
.mkString("\n")
From 016a6c542ab6e169e9ff49cfb92f2e5c86719e1e Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 2 Dec 2025 18:55:33 -0500
Subject: [PATCH 11/22] fixes
---
.../init-resources/jupyter-docker-compose-gce.yaml | 2 +-
.../init-resources/jupyter-docker-compose.yaml | 2 +-
http/src/main/resources/init-resources/startup.sh | 10 +++++++++-
.../dsde/workbench/leonardo/util/GceInterpreter.scala | 2 --
4 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
index 375e4209ee..72018be89d 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
@@ -34,7 +34,7 @@ services:
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
# We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
index 34f538e706..b20f8e41ae 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
@@ -52,7 +52,7 @@ services:
# We should remove the two lines once we no longer support older images.
# When we update base image in terra-docker next time, we should verify the paths are still valid
PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
- PATH: "/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HOME}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
+ PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HOME}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
- /var/custom_env_vars.env
# See https://docs.docker.com/engine/reference/run/#user-memory-constraints
diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh
index 7e90819ac2..2fb69d910d 100644
--- a/http/src/main/resources/init-resources/startup.sh
+++ b/http/src/main/resources/init-resources/startup.sh
@@ -365,8 +365,16 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# This is to make it so that older images will still work after we change notebooks location to home dir
docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
+ # Determine jupyter path based on image type
+ # images based on the new terra-base have jupyter installed in /etc/jupyter/bin/jupyter
+ if docker exec $JUPYTER_SERVER_NAME test -f /etc/jupyter/.terra-base-marker; then
+ JUPYTER_PATH="/etc/jupyter/bin/jupyter"
+ else
+ JUPYTER_PATH="/opt/conda/bin/jupyter"
+ fi
+
# Start Jupyter server
- docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/scripts/run-jupyter.sh $NOTEBOOKS_DIR || /opt/conda/bin/jupyter notebook)"
+ docker exec -d $JUPYTER_SERVER_NAME /bin/bash -c "export WELDER_ENABLED=$WELDER_ENABLED && export NOTEBOOKS_DIR=$NOTEBOOKS_DIR && (/etc/jupyter/scripts/run-jupyter.sh $NOTEBOOKS_DIR || $JUPYTER_PATH notebook)"
fi
# Configuring RStudio, if enabled
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
index 23142bd5cb..a08489221d 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GceInterpreter.scala
@@ -17,7 +17,6 @@ import org.broadinstitute.dsde.workbench.google2.{
SubnetworkName,
ZoneName
}
-import org.broadinstitute.dsde.workbench.leonardo.RuntimeImageType.Jupyter
import org.broadinstitute.dsde.workbench.leonardo.config.ClusterResourcesConfig
import org.broadinstitute.dsde.workbench.leonardo.dao.WelderDAO
import org.broadinstitute.dsde.workbench.leonardo.dao.google._
@@ -112,7 +111,6 @@ class GceInterpreter[F[_]](
.fromOption(config.clusterResourcesConfig.cloudInit,
new LeoException("No cloud init file defined for GCE VM.", traceId = Some(ctx.traceId))
)
- else ClusterResourcesConfig.path
cloudInitFileContent = scala.io.Source
.fromResource(s"${ClusterResourcesConfig.basePath}/${cloudInit.asString}")
.getLines()
From 0f54e5358f601fdcede34885724010501b34f050 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 09:51:38 -0500
Subject: [PATCH 12/22] remove dont delete
---
.../leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
index 8a462ad22d..d6cc6e3a64 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/BaseCloudServiceRuntimeMonitor.scala
@@ -112,7 +112,7 @@ abstract class BaseCloudServiceRuntimeMonitor[F[_]] {
runtimeAndRuntimeConfig: RuntimeAndRuntimeConfig,
errorDetails: RuntimeErrorDetails,
mainInstance: Option[DataprocInstance],
- deleteRuntime: Boolean = false
+ deleteRuntime: Boolean = true
)(implicit
ev: Ask[F, AppContext]
): F[CheckResult] =
@@ -590,7 +590,7 @@ abstract class BaseCloudServiceRuntimeMonitor[F[_]] {
Some("tool_start_up")
),
mainDataprocInstance,
- false
+ deleteRuntimeOnFail
)
}
} yield r
From 9222e428b6a03abd226f74f3b923ae5bf1753a60 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 10:41:13 -0500
Subject: [PATCH 13/22] update list of images to include base
---
automation/src/test/resources/reference.conf | 1 +
.../dsde/workbench/leonardo/LeonardoConfig.scala | 2 +-
.../prepare-custom-leonardo-jupyter-dataproc-image.sh | 3 ++-
jenkins/gce-custom-images/prepare_gce_image.sh | 4 ++--
4 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/automation/src/test/resources/reference.conf b/automation/src/test/resources/reference.conf
index 53e651fe8d..fdaf0c7836 100644
--- a/automation/src/test/resources/reference.conf
+++ b/automation/src/test/resources/reference.conf
@@ -1,4 +1,5 @@
leonardo {
+ baseImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
rImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
pythonImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14"
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
index 9ae1309bf7..14edc6e362 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
@@ -12,12 +12,12 @@ object LeonardoConfig extends CommonConfig {
object Leonardo {
val apiUrl: String = leonardo.getString("apiUrl")
val notebooksServiceAccountEmail: String = leonardo.getString("notebooksServiceAccountEmail")
+ val baseImageUrl: String = leonardo.getString("baseImageUrl")
val rImageUrl: String = leonardo.getString("rImageUrl")
val pythonImageUrl: String = leonardo.getString("pythonImageUrl")
val hailImageUrl: String = leonardo.getString("hailImageUrl")
val gatkImageUrl: String = leonardo.getString("gatkImageUrl")
val aouImageUrl: String = leonardo.getString("aouImageUrl")
- val baseImageUrl: String = leonardo.getString("baseImageUrl")
val rstudioBioconductorImage =
ContainerImage(leonardo.getString("rstudioBioconductorImageUrl"), ContainerRegistry.GCR)
diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
index ed11d0d158..b0d847a969 100755
--- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
+++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
@@ -17,6 +17,7 @@ set -e -x
#
# Note: You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1
+terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7"
@@ -33,7 +34,7 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.
# This array determines which of the above images are baked into the custom dataproc 2.2.x image
# the entry must match the var name above, which must correspond to a valid docker URI
-docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector"
+docker_image_var_names="welder_server terra-base terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector"
# Comment the above and uncomment this to create the dataproc 2.1.x image
# You would also need to revert the dataproc versions in the create_dataproc_image.sh like this:
diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh
index 20d2ce15b0..7ecda9a3e9 100755
--- a/jenkins/gce-custom-images/prepare_gce_image.sh
+++ b/jenkins/gce-custom-images/prepare_gce_image.sh
@@ -15,7 +15,7 @@ set -e -x
#
# Constants and Global Vars
#
-
+terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7"
@@ -39,7 +39,7 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.
# This array determines which of the above images are baked into the custom image
# the entry must match the var name above, which must correspond to a valid docker URI
-docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_gatk terra_jupyter_aou terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector cos_gpu_installer google_cloud_toolbox docker_composer docker_composer_with_auth"
+docker_image_var_names="welder_server terra_base terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_gatk terra_jupyter_aou terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector cos_gpu_installer google_cloud_toolbox docker_composer docker_composer_with_auth"
#
# Functions
From a033f064bd3aec9735593a0fa6489ee049305712 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 11:18:24 -0500
Subject: [PATCH 14/22] gar and gcr addresses should be mapped to GAR
---
.../dsde/workbench/leonardo/LeonardoConfig.scala | 2 +-
.../workbench/leonardo/RuntimeFixtureSpec.scala | 2 +-
.../runtimes/RuntimeCreationDiskSpec.scala | 2 +-
.../leonardo/runtimes/RuntimeDataprocSpec.scala | 2 +-
.../workbench/leonardo/containerModels.scala | 9 ++++++---
.../jupyter-docker-compose-gce.yaml | 1 +
.../init-resources/jupyter-docker-compose.yaml | 1 +
.../src/main/resources/init-resources/startup.sh | 2 ++
.../workbench/leonardo/dao/HttpDockerDAO.scala | 6 +++---
.../leonardo/util/BaseRuntimeInterpreter.scala | 2 +-
.../leonardo/dao/HttpDockerDAOSpec.scala | 16 ++++++++--------
.../http/service/RuntimeServiceInterpSpec.scala | 6 +++---
.../leonardo/model/LeonardoModelSpec.scala | 16 +++++++++-------
13 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
index 14edc6e362..2b8049495c 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
@@ -19,7 +19,7 @@ object LeonardoConfig extends CommonConfig {
val gatkImageUrl: String = leonardo.getString("gatkImageUrl")
val aouImageUrl: String = leonardo.getString("aouImageUrl")
val rstudioBioconductorImage =
- ContainerImage(leonardo.getString("rstudioBioconductorImageUrl"), ContainerRegistry.GCR)
+ ContainerImage(leonardo.getString("rstudioBioconductorImageUrl"), ContainerRegistry.GAR)
private val topic = ProjectTopicName.of(gcs.getString("serviceProject"), leonardo.getString("topicName"))
val location: Location = Location(leonardo.getString("location"))
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/RuntimeFixtureSpec.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/RuntimeFixtureSpec.scala
index ce3d386012..d4352ecef7 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/RuntimeFixtureSpec.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/RuntimeFixtureSpec.scala
@@ -115,7 +115,7 @@ trait RuntimeFixtureSpec
billingProject,
runtimeName,
getRuntimeRequest(cloudService.getOrElse(CloudService.GCE),
- toolDockerImage.map(i => ContainerImage(i, ContainerRegistry.GCR)),
+ toolDockerImage.map(i => ContainerImage(i, ContainerRegistry.GAR)),
welderRegistry
)
)
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeCreationDiskSpec.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeCreationDiskSpec.scala
index fe658fe86d..83e7c084bb 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeCreationDiskSpec.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeCreationDiskSpec.scala
@@ -213,7 +213,7 @@ class RuntimeCreationDiskSpec extends BillingProjectFixtureSpec with ParallelTes
)
val createRuntime2Request = createRuntimeRequest.copy(toolDockerImage =
- Some(ContainerImage(LeonardoConfig.Leonardo.pythonImageUrl, ContainerRegistry.GCR))
+ Some(ContainerImage(LeonardoConfig.Leonardo.pythonImageUrl, ContainerRegistry.GAR))
) // this just needs to be a different image from default image Leonardo uses, which is gatk
val createRuntimeCloneRequest = createRuntime2Request.copy(
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeDataprocSpec.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeDataprocSpec.scala
index 5a9659aed6..0159c67794 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeDataprocSpec.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/runtimes/RuntimeDataprocSpec.scala
@@ -106,7 +106,7 @@ class RuntimeDataprocSpec extends BillingProjectFixtureSpec with ParallelTestExe
false
)
),
- toolDockerImage = Some(ContainerImage(LeonardoConfig.Leonardo.hailImageUrl, ContainerRegistry.GCR))
+ toolDockerImage = Some(ContainerImage(LeonardoConfig.Leonardo.hailImageUrl, ContainerRegistry.GAR))
)
getRuntimeResponse <- LeonardoApiClient.createRuntimeWithWait(project, runtimeName, createRuntimeRequest)
runtime = ClusterCopy.fromGetRuntimeResponseCopy(getRuntimeResponse)
diff --git a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
index 860051cc90..dc6d7dece3 100644
--- a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
+++ b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
@@ -12,9 +12,12 @@ sealed trait ContainerRegistry extends EnumEntry with Product with Serializable
object ContainerRegistry extends Enum[ContainerRegistry] {
val values = findValues
- final case object GCR extends ContainerRegistry {
- val regex: Regex =
- """^((?:us\.|eu\.|asia\.)?gcr.io)/([\w.-]+/[\w.-]+)(?::(\w[\w.-]+))?(?:@([\w+.-]+:[A-Fa-f0-9]{32,}))?$""".r
+ final case object GAR extends ContainerRegistry {
+ val regex: Regex = {
+ // with GCR switching to GAR but keeping the old addresses valid, this accepts both gcr.io and gar.io addresses
+ """^((?:us\.|eu\.|asia\.)?(?:gcr|gar)\.io)/([\w.-]+/[\w.-]+)(?::(\w[\w.-]+))?(?:@([\w+.-]+:[A-Fa-f0-9]{32,}))?$""".r
+ }
+
override def toString: String = "GCR"
}
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
index 72018be89d..0e47b21914 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml
@@ -33,6 +33,7 @@ services:
R_LIBS: "${NOTEBOOKS_DIR}/packages"
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2
# We should remove the two lines once we no longer support older images. In the meantime, we need to be careful updating Jupyter base images.
+ # NOTE: its very important to update these paths to access any tools added or moved in the base image
PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${NOTEBOOKS_DIR}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
index b20f8e41ae..0b0b50ce60 100644
--- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
+++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml
@@ -51,6 +51,7 @@ services:
# The next two lines aren't great. But they're for updating PYTHONPATH, PATH in older than (inclusive) us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.0.2.
# We should remove the two lines once we no longer support older images.
# When we update base image in terra-docker next time, we should verify the paths are still valid
+ # NOTE: its very important to update these paths to access any tools added or moved in the base image
PYTHONPATH: "/etc/jupyter/custom:/usr/lib/spark/python:${NOTEBOOKS_DIR}/packages"
PATH: "/root/.local/bin:/etc/jupyter/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HOME}/.local/bin:${NOTEBOOKS_DIR}/packages/bin"
env_file:
diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh
index 2fb69d910d..6221068a8f 100644
--- a/http/src/main/resources/init-resources/startup.sh
+++ b/http/src/main/resources/init-resources/startup.sh
@@ -367,6 +367,8 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# Determine jupyter path based on image type
# images based on the new terra-base have jupyter installed in /etc/jupyter/bin/jupyter
+ # this marker is added as one of the last steps in the new base image here:
+ # https://github.com/DataBiosphere/terra-docker/blob/9aa343ae91ad1c94ca5a9f9e5bee02cc5d943b58/terra-base/Dockerfile#L235
if docker exec $JUPYTER_SERVER_NAME test -f /etc/jupyter/.terra-base-marker; then
JUPYTER_PATH="/etc/jupyter/bin/jupyter"
else
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAO.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAO.scala
index cb6bd33d82..0ba9e36c91 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAO.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAO.scala
@@ -107,7 +107,7 @@ class HttpDockerDAO[F[_]] private (httpClient: Client[F])(implicit logger: Logge
): F[Option[Token]] =
parsedImage.registry match {
// If it's a GCR repo, use the pet token
- case ContainerRegistry.GCR => F.pure(petTokenOpt.map(Token))
+ case ContainerRegistry.GAR => F.pure(petTokenOpt.map(Token))
// If it's a Dockerhub repo, need to request a token from Dockerhub
case ContainerRegistry.DockerHub =>
httpClient.expectOptionOr[Token](
@@ -150,14 +150,14 @@ class HttpDockerDAO[F[_]] private (httpClient: Client[F])(implicit logger: Logge
private[dao] def parseImage(image: ContainerImage)(implicit ev: Ask[F, TraceId]): F[ParsedImage] =
image.imageUrl match {
- case GCR.regex(registry, imageName, tagOpt, shaOpt) =>
+ case GAR.regex(registry, imageName, tagOpt, shaOpt) =>
val version = Option(tagOpt)
.map(Tag)
.orElse(Option(shaOpt).map(Sha))
for {
traceId <- ev.ask
res <- version.fold(F.raiseError[ParsedImage](ImageParseException(traceId, image)))(i =>
- F.pure(ParsedImage(GCR, Uri.unsafeFromString(s"https://$registry/v2"), imageName, i))
+ F.pure(ParsedImage(GAR, Uri.unsafeFromString(s"https://$registry/v2"), imageName, i))
)
} yield res
case DockerHub.regex(imageName, tagOpt, shaOpt) =>
diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BaseRuntimeInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BaseRuntimeInterpreter.scala
index 6f4e028ecb..f697e7c52a 100644
--- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BaseRuntimeInterpreter.scala
+++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BaseRuntimeInterpreter.scala
@@ -159,7 +159,7 @@ abstract private[util] class BaseRuntimeInterpreter[F[_]](
.toRight(new Exception(s"Unable to update welder because current welder image is not available"))
.flatMap(x =>
x.registry match {
- case Some(ContainerRegistry.GCR) | Some(ContainerRegistry.GHCR) =>
+ case Some(ContainerRegistry.GAR) | Some(ContainerRegistry.GHCR) =>
Right(config.imageConfig.welderGcrImage.imageUrl)
case Some(ContainerRegistry.DockerHub) => Right(config.imageConfig.welderDockerHubImage.imageUrl)
case None => Left(new Exception(s"Unable to update Welder: registry for ${x.imageUrl} not parsable"))
diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAOSpec.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAOSpec.scala
index d329254e58..e6ff275c50 100644
--- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAOSpec.scala
+++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/dao/HttpDockerDAOSpec.scala
@@ -6,7 +6,7 @@ import cats.effect.unsafe.implicits.global
import io.circe.CursorOp.DownField
import io.circe.DecodingFailure
import io.circe.parser.decode
-import org.broadinstitute.dsde.workbench.leonardo.ContainerRegistry.{DockerHub, GCR, GHCR}
+import org.broadinstitute.dsde.workbench.leonardo.ContainerRegistry.{DockerHub, GAR, GHCR}
import org.broadinstitute.dsde.workbench.leonardo.RuntimeImageType.{Jupyter, RStudio}
import org.broadinstitute.dsde.workbench.leonardo.dao.HttpDockerDAO._
import org.broadinstitute.dsde.workbench.leonardo.model.InvalidImage
@@ -30,10 +30,10 @@ class HttpDockerDAOSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll
// "broadinstitute/leonardo-notebooks@sha256:bb959cf74f31d2a10f7bb8ee0f0754138d7c90f7ed8a92c3697ac994ff8b40b7"
// ),
// gcr with tag
- ContainerImage("us.gcr.io/broad-dsp-gcr-public/leonardo-jupyter:dev", GCR),
- ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6", GCR),
- ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7", GCR),
- ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9", GCR),
+ ContainerImage("us.gcr.io/broad-dsp-gcr-public/leonardo-jupyter:dev", GAR),
+ ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6", GAR),
+ ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7", GAR),
+ ContainerImage("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9", GAR),
// gcr with sha
// TODO shas are currently not working
// GCR(
@@ -55,7 +55,7 @@ class HttpDockerDAOSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll
// "rocker/rstudio@sha256:5aea617714eb38a97a21de652ab667c6d7bb486d7468a4ab6b4d515154fec383"
// ),
// gcr with tag
- ContainerImage("us.gcr.io/anvil-gcr-public/anvil-rstudio-base:0.0.1", GCR)
+ ContainerImage("us.gcr.io/anvil-gcr-public/anvil-rstudio-base:0.0.1", GAR)
// gcr with sha
// TODO shas are currently not working
// GCR(
@@ -83,7 +83,7 @@ class HttpDockerDAOSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll
}
it should s"detect ImageParseException" in withDockerDAO { dockerDAO =>
- val image = ContainerImage("us.gcr.io/anvil-gcr-public/anvil-rstudio-base", GCR) // non existent tag
+ val image = ContainerImage("us.gcr.io/anvil-gcr-public/anvil-rstudio-base", GAR) // non existent tag
val res = for {
ctx <- appContext.ask[AppContext]
response <- dockerDAO.detectTool(image, None, ctx.now).attempt
@@ -93,7 +93,7 @@ class HttpDockerDAOSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll
it should s"detect invalid GCR image if image doesn't have proper environment variables set" in withDockerDAO {
dockerDAO =>
- val image = ContainerImage("us.gcr.io/broad-dsp-gcr-public/welder-server:latest", GCR) // not a supported tool
+ val image = ContainerImage("us.gcr.io/broad-dsp-gcr-public/welder-server:latest", GAR) // not a supported tool
val res = for {
ctx <- appContext.ask[AppContext]
response <- dockerDAO.detectTool(image, None, ctx.now).attempt
diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/RuntimeServiceInterpSpec.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/RuntimeServiceInterpSpec.scala
index 92c030a7ae..841a5de6e6 100644
--- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/RuntimeServiceInterpSpec.scala
+++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/RuntimeServiceInterpSpec.scala
@@ -378,7 +378,7 @@ class RuntimeServiceInterpTest
it should "successfully create a cluster with an rstudio image" in isolatedDbTest {
val cloudContext = CloudContext.Gcp(GoogleProject("googleProject"))
val runtimeName = RuntimeName("clusterName2")
- val rstudioImage = ContainerImage("some-rstudio-image", ContainerRegistry.GCR)
+ val rstudioImage = ContainerImage("some-rstudio-image", ContainerRegistry.GAR)
val request = emptyCreateRuntimeReq.copy(
toolDockerImage = Some(rstudioImage)
)
@@ -564,7 +564,7 @@ class RuntimeServiceInterpTest
userInfo,
cloudContext,
runtimeName2,
- emptyCreateRuntimeReq.copy(welderRegistry = Some(ContainerRegistry.GCR))
+ emptyCreateRuntimeReq.copy(welderRegistry = Some(ContainerRegistry.GAR))
)
.attempt
r3 <- runtimeService
@@ -634,7 +634,7 @@ class RuntimeServiceInterpTest
userInfo,
cloudContext,
runtimeName2,
- emptyCreateRuntimeReq.copy(welderRegistry = Some(ContainerRegistry.GCR))
+ emptyCreateRuntimeReq.copy(welderRegistry = Some(ContainerRegistry.GAR))
)
.attempt
diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/model/LeonardoModelSpec.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/model/LeonardoModelSpec.scala
index 9092a7cf04..6512ca3b20 100644
--- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/model/LeonardoModelSpec.scala
+++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/model/LeonardoModelSpec.scala
@@ -169,14 +169,16 @@ class LeonardoModelSpec extends LeonardoTestSuite with AnyFlatSpecLike {
}
"DockerRegistry regex" should "match expected image url format" in {
- ContainerRegistry.GCR.regex.pattern.asPredicate().test("us.gcr.io/google/ubuntu1804:latest") shouldBe true
- ContainerRegistry.GCR.regex.pattern.asPredicate().test("us.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
- ContainerRegistry.GCR.regex.pattern.asPredicate().test("us/broad-dsp-gcr-public/ubuntu1804") shouldBe false
- ContainerRegistry.GCR.regex.pattern.asPredicate().test("eu.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
- ContainerRegistry.GCR.regex.pattern
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("us.gcr.io/google/ubuntu1804:latest") shouldBe true
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("us.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("us.gar.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("us/broad-dsp-gcr-public/ubuntu1804") shouldBe false
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("eu.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
+ ContainerRegistry.GAR.regex.pattern.asPredicate().test("eu.gar.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
+ ContainerRegistry.GAR.regex.pattern
.asPredicate()
.test("asia.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe true
- ContainerRegistry.GCR.regex.pattern
+ ContainerRegistry.GAR.regex.pattern
.asPredicate()
.test("unknown.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe false
@@ -212,7 +214,7 @@ class LeonardoModelSpec extends LeonardoTestSuite with AnyFlatSpecLike {
"ContainerImage.stringToJupyterDockerImage" should "match GCR first, and then dockerhub" in {
ContainerImage.fromImageUrl("us.gcr.io/broad-dsp-gcr-public/ubuntu1804") shouldBe (Some(
- ContainerImage("us.gcr.io/broad-dsp-gcr-public/ubuntu1804", ContainerRegistry.GCR)
+ ContainerImage("us.gcr.io/broad-dsp-gcr-public/ubuntu1804", ContainerRegistry.GAR)
))
ContainerImage.fromImageUrl("asd/asdf") shouldBe (Some(
ContainerImage("asd/asdf", ContainerRegistry.DockerHub)
From 4af7443a8377ffca9f1c96877dabfc4496441c13 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 11:52:27 -0500
Subject: [PATCH 15/22] fmt
---
.../dsde/workbench/leonardo/LeonardoConfig.scala | 2 +-
.../dsde/workbench/leonardo/containerModels.scala | 3 +--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
index 2b8049495c..2232a47c87 100644
--- a/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
+++ b/automation/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/LeonardoConfig.scala
@@ -12,7 +12,7 @@ object LeonardoConfig extends CommonConfig {
object Leonardo {
val apiUrl: String = leonardo.getString("apiUrl")
val notebooksServiceAccountEmail: String = leonardo.getString("notebooksServiceAccountEmail")
- val baseImageUrl: String = leonardo.getString("baseImageUrl")
+ val baseImageUrl: String = leonardo.getString("baseImageUrl")
val rImageUrl: String = leonardo.getString("rImageUrl")
val pythonImageUrl: String = leonardo.getString("pythonImageUrl")
val hailImageUrl: String = leonardo.getString("hailImageUrl")
diff --git a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
index dc6d7dece3..af71c64321 100644
--- a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
+++ b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
@@ -13,10 +13,9 @@ object ContainerRegistry extends Enum[ContainerRegistry] {
val values = findValues
final case object GAR extends ContainerRegistry {
- val regex: Regex = {
+ val regex: Regex =
// with GCR switching to GAR but keeping the old addresses valid, this accepts both gcr.io and gar.io addresses
"""^((?:us\.|eu\.|asia\.)?(?:gcr|gar)\.io)/([\w.-]+/[\w.-]+)(?::(\w[\w.-]+))?(?:@([\w+.-]+:[A-Fa-f0-9]{32,}))?$""".r
- }
override def toString: String = "GCR"
}
From dbc25acc06f8f3a93cc0ebe46d79bda0228aa372 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Thu, 8 Jan 2026 19:54:14 -0500
Subject: [PATCH 16/22] switch to version 1.0.0
---
automation/src/test/resources/reference.conf | 2 +-
.../dsde/workbench/leonardo/containerModels.scala | 2 +-
http/src/main/resources/init-resources/init-actions.sh | 7 +++++--
.../prepare-custom-leonardo-jupyter-dataproc-image.sh | 2 +-
jenkins/gce-custom-images/prepare_gce_image.sh | 2 +-
5 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/automation/src/test/resources/reference.conf b/automation/src/test/resources/reference.conf
index fdaf0c7836..3879d82dc0 100644
--- a/automation/src/test/resources/reference.conf
+++ b/automation/src/test/resources/reference.conf
@@ -1,5 +1,5 @@
leonardo {
- baseImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
+ baseImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-base:1.0.0"
rImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
pythonImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14"
diff --git a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
index af71c64321..44e8994104 100644
--- a/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
+++ b/core/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/containerModels.scala
@@ -17,7 +17,7 @@ object ContainerRegistry extends Enum[ContainerRegistry] {
// with GCR switching to GAR but keeping the old addresses valid, this accepts both gcr.io and gar.io addresses
"""^((?:us\.|eu\.|asia\.)?(?:gcr|gar)\.io)/([\w.-]+/[\w.-]+)(?::(\w[\w.-]+))?(?:@([\w+.-]+:[A-Fa-f0-9]{32,}))?$""".r
- override def toString: String = "GCR"
+ override def toString: String = "GAR"
}
final case object GHCR extends ContainerRegistry {
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index a48d0f2d99..aa8b9b1aa2 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -275,7 +275,7 @@ EOF
# If any image is hosted in a GAR registry (detected by regex) then
# authorize docker to interact with gcr.io.
# NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect.
- if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
+ if grep -qF "gcr.io|gar.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then
log 'Authorizing GCR/GAR...'
gcloud auth configure-docker
fi
@@ -329,7 +329,10 @@ EOF
# Jupyter-specific setup, only do if Jupyter is installed
if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
- log 'Installing Jupydocker kernelspecs...'
+ log 'Installing Jupyter kernelspecs...'
+ # Install kernelspecs inside the Jupyter container
+ retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
+
# Install notebook.json
if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then
diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
index b0d847a969..5ca6351bfd 100755
--- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
+++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh
@@ -17,7 +17,7 @@ set -e -x
#
# Note: You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1
-terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
+terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:1.0.0"
terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7"
diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh
index 7ecda9a3e9..637debd33a 100755
--- a/jenkins/gce-custom-images/prepare_gce_image.sh
+++ b/jenkins/gce-custom-images/prepare_gce_image.sh
@@ -15,7 +15,7 @@ set -e -x
#
# Constants and Global Vars
#
-terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:0.0.1"
+terra_base="us.gcr.io/broad-dsp-gcr-public/terra-base:1.0.0"
terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6"
terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7"
terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7"
From d718de7028a189519a2bb07da732a407db52adcc Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 16:59:40 -0500
Subject: [PATCH 17/22] trigger new test suite
---
http/src/main/resources/init-resources/gce-init.sh | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 50196db598..23ca853a58 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -386,6 +386,10 @@ fi
# This needs to happen before we start up containers because the jupyter user needs to be the owner of the PD
chmod a+rwx ${WORK_DIRECTORY}
+# make sure permissions are set so that the jupyter user can read/write to the .jupyter directory
+sudo chown -R jupyter:user ~/home/jupyter/.jupyter
+
+
# Docker compose up, starting all of the containers
${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d
From d1188a8502dd8c2e656c68931134c46fee318ff3 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 18:28:56 -0500
Subject: [PATCH 18/22] fix
---
http/src/main/resources/init-resources/gce-init.sh | 7 +++----
http/src/main/resources/init-resources/init-actions.sh | 4 ++++
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 23ca853a58..bc378bb25a 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -386,10 +386,6 @@ fi
# This needs to happen before we start up containers because the jupyter user needs to be the owner of the PD
chmod a+rwx ${WORK_DIRECTORY}
-# make sure permissions are set so that the jupyter user can read/write to the .jupyter directory
-sudo chown -R jupyter:user ~/home/jupyter/.jupyter
-
-
# Docker compose up, starting all of the containers
${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d
@@ -548,6 +544,9 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
+# make sure permissions are set so that the jupyter user owns the .jupyter directory
+ docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:user ~/home/jupyter/.jupyter"
+
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
# This is to make it so that older images will still work after we change notebooks location to home dir
docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index aa8b9b1aa2..96fa9267e6 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -486,6 +486,10 @@ EOF
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
+ # make sure permissions are set so that the jupyter user owns the .jupyter directory
+ docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:user ~/home/jupyter/.jupyter"
+
+
log 'Starting Jupyter Notebook...'
retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
From fa5746f1c8c0ae718f3e37413074b666b66651a6 Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 19:54:46 -0500
Subject: [PATCH 19/22] again
---
http/src/main/resources/init-resources/gce-init.sh | 4 ++--
http/src/main/resources/init-resources/init-actions.sh | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index bc378bb25a..7b2d1b0dda 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -544,8 +544,8 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
-# make sure permissions are set so that the jupyter user owns the .jupyter directory
- docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:user ~/home/jupyter/.jupyter"
+ # make sure permissions are set so that the jupyter user owns the .jupyter directory
+ docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter"
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
# This is to make it so that older images will still work after we change notebooks location to home dir
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index 96fa9267e6..1d0d82bd46 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -486,8 +486,8 @@ EOF
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
- # make sure permissions are set so that the jupyter user owns the .jupyter directory
- docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:user ~/home/jupyter/.jupyter"
+ # give the jupyter user ownership of the .jupyter directory for extensions to function properly
+ docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:users ~$JUPYTER_HOME/.jupyter"
log 'Starting Jupyter Notebook...'
From 3369bc2f6e835d90b450eaa6cb23c2d7ae5a151f Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:20:50 -0500
Subject: [PATCH 20/22] anotha
---
http/src/main/resources/init-resources/gce-init.sh | 4 +---
http/src/main/resources/init-resources/init-actions.sh | 5 +----
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index 7b2d1b0dda..aa22a1428f 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -537,6 +537,7 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
&& mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter \
&& cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
&& cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
@@ -544,9 +545,6 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
- # make sure permissions are set so that the jupyter user owns the .jupyter directory
- docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter"
-
# In new jupyter images, we should update jupyter_notebook_config.py in terra-docker.
# This is to make it so that older images will still work after we change notebooks location to home dir
docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index 1d0d82bd46..2bfde63966 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -479,6 +479,7 @@ EOF
# Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
&& mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
+ && chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter \
&& cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
&& cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
@@ -486,10 +487,6 @@ EOF
&& cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& mkdir -p $JUPYTER_HOME/nbconfig"
- # give the jupyter user ownership of the .jupyter directory for extensions to function properly
- docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "chown -R jupyter:users ~$JUPYTER_HOME/.jupyter"
-
-
log 'Starting Jupyter Notebook...'
retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}"
From 4d802c48791c27a4494aef7c5e5b1489e6fba59f Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:53:00 -0500
Subject: [PATCH 21/22] this is fun
---
http/src/main/resources/init-resources/gce-init.sh | 2 +-
http/src/main/resources/init-resources/init-actions.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh
index aa22a1428f..1cee09dee7 100644
--- a/http/src/main/resources/init-resources/gce-init.sh
+++ b/http/src/main/resources/init-resources/gce-init.sh
@@ -537,7 +537,7 @@ if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then
# Starts the locking logic (used for AOU). google_sign_in.js is likely not used anymore
docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
&& mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
- && chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter \
+ && chown -R jupyter:users $JUPYTER_USER_HOME/.jupyter \
&& cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
&& cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index 2bfde63966..29c505f253 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -479,7 +479,7 @@ EOF
# Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU)
docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \
&& mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \
- && chown -R jupyter:users ~$JUPYTER_USER_HOME/.jupyter \
+ && chown -R jupyter:users $JUPYTER_USER_HOME/.jupyter \
&& cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \
&& ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \
&& cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \
From 8114052003e8bde0b565a07cc533cc8379c9e55c Mon Sep 17 00:00:00 2001
From: lmcnatt <85642387+lucymcnatt@users.noreply.github.com>
Date: Tue, 13 Jan 2026 17:45:48 -0500
Subject: [PATCH 22/22] Update init-actions.sh
---
http/src/main/resources/init-resources/init-actions.sh | 4 ----
1 file changed, 4 deletions(-)
diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh
index 29c505f253..b665ba6dfb 100644
--- a/http/src/main/resources/init-resources/init-actions.sh
+++ b/http/src/main/resources/init-resources/init-actions.sh
@@ -329,10 +329,6 @@ EOF
# Jupyter-specific setup, only do if Jupyter is installed
if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then
- log 'Installing Jupyter kernelspecs...'
- # Install kernelspecs inside the Jupyter container
- retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/kernel/kernelspec.sh ${JUPYTER_SCRIPTS}/kernel ${KERNELSPEC_HOME}
-
# Install notebook.json
if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then